diff options
Diffstat (limited to 'drivers/net/xen-netback')
| -rw-r--r-- | drivers/net/xen-netback/Makefile | 1 | ||||
| -rw-r--r-- | drivers/net/xen-netback/common.h | 60 | ||||
| -rw-r--r-- | drivers/net/xen-netback/hash.c | 11 | ||||
| -rw-r--r-- | drivers/net/xen-netback/interface.c | 275 | ||||
| -rw-r--r-- | drivers/net/xen-netback/netback.c | 492 | ||||
| -rw-r--r-- | drivers/net/xen-netback/rx.c | 105 | ||||
| -rw-r--r-- | drivers/net/xen-netback/xenbus.c | 474 |
7 files changed, 820 insertions, 598 deletions
diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile index d49798a46b51..84e9cbc36359 100644 --- a/drivers/net/xen-netback/Makefile +++ b/drivers/net/xen-netback/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o xen-netback-y := netback.o xenbus.o interface.o hash.o rx.o diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 936c0b3e0ba2..17421da139f2 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -48,7 +48,6 @@ #include <linux/debugfs.h> typedef unsigned int pending_ring_idx_t; -#define INVALID_PENDING_RING_IDX (~0U) struct pending_tx_info { struct xen_netif_tx_request req; /* tx request */ @@ -63,7 +62,7 @@ struct pending_tx_info { * ubuf_to_vif is a helper which finds the struct xenvif from a pointer * to this field. */ - struct ubuf_info callback_struct; + struct ubuf_info_msgzc callback_struct; }; #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE) @@ -82,8 +81,6 @@ struct xenvif_rx_meta { /* Discriminate from any valid pending_idx value. */ #define INVALID_PENDING_IDX 0xFFFF -#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE - #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE /* The maximum number of frags is derived from the size of a grant (same @@ -140,6 +137,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */ char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */ struct xenvif *vif; /* Parent VIF */ + /* + * TX/RX common EOI handling. + * When feature-split-event-channels = 0, interrupt handler sets + * NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set + * by the RX and TX interrupt handlers. + * RX and TX handler threads will issue an EOI when either + * NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or + * NETBK_TX_EOI) are set and they will reset those bits. + */ + atomic_t eoi_pending; +#define NETBK_RX_EOI 0x01 +#define NETBK_TX_EOI 0x02 +#define NETBK_COMMON_EOI 0x04 + /* Use NAPI for guest TX */ struct napi_struct napi; /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ @@ -155,7 +166,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; - struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; + struct gnttab_copy tx_copy_ops[2 * MAX_PENDING_REQS]; struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; /* passed to gnttab_[un]map_refs with pages under (un)mapping */ @@ -189,6 +200,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned int rx_queue_max; unsigned int rx_queue_len; unsigned long last_rx_time; + unsigned int rx_slots_needed; bool stalled; struct xenvif_copy_state rx_copy; @@ -248,6 +260,22 @@ struct xenvif_hash { struct xenvif_hash_cache cache; }; +struct backend_info { + struct xenbus_device *dev; + struct xenvif *vif; + + /* This is the state that will be reflected in xenstore when any + * active hotplug script completes. + */ + enum xenbus_state state; + + enum xenbus_state frontend_state; + struct xenbus_watch hotplug_status_watch; + u8 have_hotplug_status_watch:1; + + const char *hotplug_script; +}; + struct xenvif { /* Unique identifier for this interface. */ domid_t domid; @@ -265,6 +293,9 @@ struct xenvif { u8 ipv6_csum:1; u8 multicast_control:1; + /* headroom requested by xen-netfront */ + u16 xdp_headroom; + /* Is this interface disabled? True when backend discovers * frontend is rogue. */ @@ -283,6 +314,8 @@ struct xenvif { struct xenbus_watch credit_watch; struct xenbus_watch mcast_ctrl_watch; + struct backend_info *be; + spinlock_t lock; #ifdef CONFIG_DEBUG_FS @@ -331,11 +364,6 @@ void xenvif_free(struct xenvif *vif); int xenvif_xenbus_init(void); void xenvif_xenbus_fini(void); -int xenvif_schedulable(struct xenvif *vif); - -int xenvif_queue_stopped(struct xenvif_queue *queue); -void xenvif_wake_queue(struct xenvif_queue *queue); - /* (Un)Map communication rings. */ void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue); int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, @@ -357,16 +385,13 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); -void xenvif_rx_action(struct xenvif_queue *queue); -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); +bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); -/* Callback from stack when TX packet can be released */ -void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); - -/* Unmap a pending page and release it back to the guest */ -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); +/* Callbacks from stack when TX packet can be released */ +extern const struct ubuf_info_ops xenvif_ubuf_ops; static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) { @@ -377,6 +402,7 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) irqreturn_t xenvif_interrupt(int irq, void *dev_id); extern bool separate_tx_rx_irq; +extern bool provides_xdp_headroom; extern unsigned int rx_drain_timeout_msecs; extern unsigned int rx_stall_timeout_msecs; diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index 0ccb021f1e78..45ddce35f6d2 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -51,7 +51,8 @@ static void xenvif_add_hash(struct xenvif *vif, const u8 *tag, found = false; oldest = NULL; - list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) { + list_for_each_entry_rcu(entry, &vif->hash.cache.list, link, + lockdep_is_held(&vif->hash.cache.lock)) { /* Make sure we don't add duplicate entries */ if (entry->len == len && memcmp(entry->tag, tag, len) == 0) @@ -94,7 +95,7 @@ static u32 xenvif_new_hash(struct xenvif *vif, const u8 *data, static void xenvif_flush_hash(struct xenvif *vif) { - struct xenvif_hash_cache_entry *entry; + struct xenvif_hash_cache_entry *entry, *n; unsigned long flags; if (xenvif_hash_cache_size == 0) @@ -102,7 +103,7 @@ static void xenvif_flush_hash(struct xenvif *vif) spin_lock_irqsave(&vif->hash.cache.lock, flags); - list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) { + list_for_each_entry_safe(entry, n, &vif->hash.cache.list, link) { list_del_rcu(&entry->link); vif->hash.cache.count--; kfree_rcu(entry, rcu); @@ -391,7 +392,7 @@ void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE: seq_puts(m, "Hash Algorithm: NONE\n"); - /* FALLTHRU */ + fallthrough; default: return; } @@ -454,6 +455,8 @@ void xenvif_init_hash(struct xenvif *vif) if (xenvif_hash_cache_size == 0) return; + BUG_ON(vif->hash.cache.count); + spin_lock_init(&vif->hash.cache.lock); INIT_LIST_HEAD(&vif->hash.cache.list); } diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 182d6770f102..a0a438881388 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -41,13 +41,10 @@ #include <asm/xen/hypercall.h> #include <xen/balloon.h> -#define XENVIF_QUEUE_LENGTH 32 -#define XENVIF_NAPI_WEIGHT 64 - /* Number of bytes allowed on the internal guest Rx queue. */ #define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE) -/* This function is used to set SKBTX_DEV_ZEROCOPY as well as +/* This function is used to set SKBFL_ZEROCOPY_ENABLE as well as * increasing the inflight counter. We need to increase the inflight * counter because core driver calls into xenvif_zerocopy_callback * which calls xenvif_skb_zerocopy_complete. @@ -55,7 +52,7 @@ void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue, struct sk_buff *skb) { - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + skb_shinfo(skb)->flags |= SKBFL_ZEROCOPY_ENABLE; atomic_inc(&queue->inflight_packets); } @@ -70,19 +67,35 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) wake_up(&queue->dealloc_wq); } -int xenvif_schedulable(struct xenvif *vif) +static int xenvif_schedulable(struct xenvif *vif) { return netif_running(vif->dev) && test_bit(VIF_STATUS_CONNECTED, &vif->status) && !vif->disabled; } +static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue) +{ + bool rc; + + rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx); + if (rc) + napi_schedule(&queue->napi); + return rc; +} + static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; + int old; - if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)) - napi_schedule(&queue->napi); + old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending); + WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n"); + + if (!xenvif_handle_tx_interrupt(queue)) { + atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } @@ -116,46 +129,68 @@ static int xenvif_poll(struct napi_struct *napi, int budget) return work_done; } +static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue) +{ + bool rc; + + rc = xenvif_have_rx_work(queue, false); + if (rc) + xenvif_kick_thread(queue); + return rc; +} + static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; + int old; - xenvif_kick_thread(queue); + old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending); + WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n"); + + if (!xenvif_handle_rx_interrupt(queue)) { + atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } irqreturn_t xenvif_interrupt(int irq, void *dev_id) { - xenvif_tx_interrupt(irq, dev_id); - xenvif_rx_interrupt(irq, dev_id); + struct xenvif_queue *queue = dev_id; + int old; + bool has_rx, has_tx; - return IRQ_HANDLED; -} + old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending); + WARN(old, "Interrupt while EOI pending\n"); -int xenvif_queue_stopped(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id)); -} + has_tx = xenvif_handle_tx_interrupt(queue); + has_rx = xenvif_handle_rx_interrupt(queue); -void xenvif_wake_queue(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); + if (!has_rx && !has_tx) { + atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } + + return IRQ_HANDLED; } static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct xenvif *vif = netdev_priv(dev); unsigned int size = vif->hash.size; + unsigned int num_queues; + + /* If queues are not set up internally - always return 0 + * as the packet going to be dropped anyway */ + num_queues = READ_ONCE(vif->num_queues); + if (num_queues < 1) + return 0; if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) - return fallback(dev, skb, NULL) % dev->real_num_tx_queues; + return netdev_pick_tx(dev, skb, NULL) % + dev->real_num_tx_queues; xenvif_set_skb_hash(vif, skb); @@ -217,14 +252,19 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) skb_clear_hash(skb); - xenvif_rx_queue_tail(queue, skb); + /* timestamp packet in software */ + skb_tx_timestamp(skb); + + if (!xenvif_rx_queue_tail(queue, skb)) + goto drop; + xenvif_kick_thread(queue); return NETDEV_TX_OK; drop: vif->dev->stats.tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -289,7 +329,7 @@ static void xenvif_down(struct xenvif *vif) if (queue->tx_irq != queue->rx_irq) disable_irq(queue->rx_irq); napi_disable(&queue->napi); - del_timer_sync(&queue->credit_timeout); + timer_delete_sync(&queue->credit_timeout); } } @@ -318,7 +358,7 @@ static int xenvif_change_mtu(struct net_device *dev, int mtu) if (mtu > max) return -EINVAL; - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); return 0; } @@ -421,7 +461,7 @@ static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data) static const struct ethtool_ops xenvif_ethtool_ops = { .get_link = ethtool_op_get_link, - + .get_ts_info = ethtool_op_get_ts_info, .get_sset_count = xenvif_get_sset_count, .get_ethtool_stats = xenvif_get_ethtool_stats, .get_strings = xenvif_get_strings, @@ -442,6 +482,9 @@ static const struct net_device_ops xenvif_netdev_ops = { struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, unsigned int handle) { + static const u8 dummy_addr[ETH_ALEN] = { + 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, + }; int err; struct net_device *dev; struct xenvif *vif; @@ -476,6 +519,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->queues = NULL; vif->num_queues = 0; + vif->xdp_headroom = 0; + spin_lock_init(&vif->lock); INIT_LIST_HEAD(&vif->fe_mcast_addr); @@ -486,8 +531,6 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, dev->features = dev->hw_features | NETIF_F_RXCSUM; dev->ethtool_ops = &xenvif_ethtool_ops; - dev->tx_queue_len = XENVIF_QUEUE_LENGTH; - dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_MAX_MTU - VLAN_ETH_HLEN; @@ -497,8 +540,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, * stolen by an Ethernet bridge for STP purposes. * (FE:FF:FF:FF:FF:FF) */ - eth_broadcast_addr(dev->dev_addr); - dev->dev_addr[0] &= ~0x01; + eth_hw_addr_set(dev, dummy_addr); netif_carrier_off(dev); @@ -550,8 +592,8 @@ int xenvif_init_queue(struct xenvif_queue *queue) } for (i = 0; i < MAX_PENDING_REQS; i++) { - queue->pending_tx_info[i].callback_struct = (struct ubuf_info) - { .callback = xenvif_zerocopy_callback, + queue->pending_tx_info[i].callback_struct = (struct ubuf_info_msgzc) + { { .ops = &xenvif_ubuf_ops }, { { .ctx = NULL, .desc = i } } }; queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; @@ -576,19 +618,27 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, unsigned int evtchn) { struct net_device *dev = vif->dev; + struct xenbus_device *xendev = xenvif_to_xenbus_device(vif); void *addr; struct xen_netif_ctrl_sring *shared; + RING_IDX rsp_prod, req_prod; int err; - err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - &ring_ref, 1, &addr); + err = xenbus_map_ring_valloc(xendev, &ring_ref, 1, &addr); if (err) goto err; shared = (struct xen_netif_ctrl_sring *)addr; - BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE); + rsp_prod = READ_ONCE(shared->rsp_prod); + req_prod = READ_ONCE(shared->req_prod); + + BACK_RING_ATTACH(&vif->ctrl, shared, rsp_prod, XEN_PAGE_SIZE); + + err = -EIO; + if (req_prod - rsp_prod > RING_SIZE(&vif->ctrl)) + goto err_unmap; - err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(xendev, evtchn); if (err < 0) goto err_unmap; @@ -611,22 +661,54 @@ err_deinit: vif->ctrl_irq = 0; err_unmap: - xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), - vif->ctrl.sring); + xenbus_unmap_ring_vfree(xendev, vif->ctrl.sring); vif->ctrl.sring = NULL; err: return err; } +static void xenvif_disconnect_queue(struct xenvif_queue *queue) +{ + if (queue->task) { + kthread_stop_put(queue->task); + queue->task = NULL; + } + + if (queue->dealloc_task) { + kthread_stop(queue->dealloc_task); + queue->dealloc_task = NULL; + } + + if (queue->napi.poll) { + netif_napi_del(&queue->napi); + queue->napi.poll = NULL; + } + + if (queue->tx_irq) { + unbind_from_irqhandler(queue->tx_irq, queue); + if (queue->tx_irq == queue->rx_irq) + queue->rx_irq = 0; + queue->tx_irq = 0; + } + + if (queue->rx_irq) { + unbind_from_irqhandler(queue->rx_irq, queue); + queue->rx_irq = 0; + } + + xenvif_unmap_frontend_data_rings(queue); +} + int xenvif_connect_data(struct xenvif_queue *queue, unsigned long tx_ring_ref, unsigned long rx_ring_ref, unsigned int tx_evtchn, unsigned int rx_evtchn) { + struct xenbus_device *dev = xenvif_to_xenbus_device(queue->vif); struct task_struct *task; - int err = -ENOMEM; + int err; BUG_ON(queue->tx_irq); BUG_ON(queue->task); @@ -641,78 +723,66 @@ int xenvif_connect_data(struct xenvif_queue *queue, init_waitqueue_head(&queue->dealloc_wq); atomic_set(&queue->inflight_packets, 0); - netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll, - XENVIF_NAPI_WEIGHT); + netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll); + + queue->stalled = true; + + task = kthread_run(xenvif_kthread_guest_rx, queue, + "%s-guest-rx", queue->name); + if (IS_ERR(task)) + goto kthread_err; + queue->task = task; + /* + * Take a reference to the task in order to prevent it from being freed + * if the thread function returns before kthread_stop is called. + */ + get_task_struct(task); + + task = kthread_run(xenvif_dealloc_kthread, queue, + "%s-dealloc", queue->name); + if (IS_ERR(task)) + goto kthread_err; + queue->dealloc_task = task; if (tx_evtchn == rx_evtchn) { /* feature-split-event-channels == 0 */ - err = bind_interdomain_evtchn_to_irqhandler( - queue->vif->domid, tx_evtchn, xenvif_interrupt, 0, + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( + dev, tx_evtchn, xenvif_interrupt, 0, queue->name, queue); if (err < 0) - goto err_unmap; + goto err; queue->tx_irq = queue->rx_irq = err; disable_irq(queue->tx_irq); } else { /* feature-split-event-channels == 1 */ snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_interdomain_evtchn_to_irqhandler( - queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( + dev, tx_evtchn, xenvif_tx_interrupt, 0, queue->tx_irq_name, queue); if (err < 0) - goto err_unmap; + goto err; queue->tx_irq = err; disable_irq(queue->tx_irq); snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_interdomain_evtchn_to_irqhandler( - queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( + dev, rx_evtchn, xenvif_rx_interrupt, 0, queue->rx_irq_name, queue); if (err < 0) - goto err_tx_unbind; + goto err; queue->rx_irq = err; disable_irq(queue->rx_irq); } - queue->stalled = true; - - task = kthread_create(xenvif_kthread_guest_rx, - (void *)queue, "%s-guest-rx", queue->name); - if (IS_ERR(task)) { - pr_warn("Could not allocate kthread for %s\n", queue->name); - err = PTR_ERR(task); - goto err_rx_unbind; - } - queue->task = task; - get_task_struct(task); - - task = kthread_create(xenvif_dealloc_kthread, - (void *)queue, "%s-dealloc", queue->name); - if (IS_ERR(task)) { - pr_warn("Could not allocate kthread for %s\n", queue->name); - err = PTR_ERR(task); - goto err_rx_unbind; - } - queue->dealloc_task = task; - - wake_up_process(queue->task); - wake_up_process(queue->dealloc_task); - return 0; -err_rx_unbind: - unbind_from_irqhandler(queue->rx_irq, queue); - queue->rx_irq = 0; -err_tx_unbind: - unbind_from_irqhandler(queue->tx_irq, queue); - queue->tx_irq = 0; -err_unmap: - xenvif_unmap_frontend_data_rings(queue); - netif_napi_del(&queue->napi); +kthread_err: + pr_warn("Could not allocate kthread for %s\n", queue->name); + err = PTR_ERR(task); err: - module_put(THIS_MODULE); + xenvif_disconnect_queue(queue); return err; } @@ -740,30 +810,7 @@ void xenvif_disconnect_data(struct xenvif *vif) for (queue_index = 0; queue_index < num_queues; ++queue_index) { queue = &vif->queues[queue_index]; - netif_napi_del(&queue->napi); - - if (queue->task) { - kthread_stop(queue->task); - put_task_struct(queue->task); - queue->task = NULL; - } - - if (queue->dealloc_task) { - kthread_stop(queue->dealloc_task); - queue->dealloc_task = NULL; - } - - if (queue->tx_irq) { - if (queue->tx_irq == queue->rx_irq) - unbind_from_irqhandler(queue->tx_irq, queue); - else { - unbind_from_irqhandler(queue->tx_irq, queue); - unbind_from_irqhandler(queue->rx_irq, queue); - } - queue->tx_irq = 0; - } - - xenvif_unmap_frontend_data_rings(queue); + xenvif_disconnect_queue(queue); } xenvif_mcast_addr_list_free(vif); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 80aae3a32c2a..c759ebc56457 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -38,6 +38,7 @@ #include <linux/if_vlan.h> #include <linux/udp.h> #include <linux/highmem.h> +#include <linux/skbuff_ref.h> #include <net/tcp.h> @@ -96,14 +97,22 @@ unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT; module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644); MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache"); +/* The module parameter tells that we have to put data + * for xen-netfront with the XDP_PACKET_HEADROOM offset + * needed for XDP processing + */ +bool provides_xdp_headroom = true; +module_param(provides_xdp_headroom, bool, 0644); + static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status); + s8 status); static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st); -static void push_tx_responses(struct xenvif_queue *queue); + s8 status); + +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); static inline int tx_work_todo(struct xenvif_queue *queue); @@ -124,7 +133,7 @@ static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue, /* Find the containing VIF's structure from a pointer in pending_tx_info array */ -static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf) +static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info_msgzc *ubuf) { u16 pending_idx = ubuf->desc; struct pending_tx_info *temp = @@ -136,12 +145,12 @@ static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf) static u16 frag_get_pending_idx(skb_frag_t *frag) { - return (u16)frag->page_offset; + return (u16)skb_frag_off(frag); } static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) { - frag->page_offset = pending_idx; + skb_frag_off_set(frag, pending_idx); } static inline pending_ring_idx_t pending_index(unsigned i) @@ -162,6 +171,10 @@ void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue) if (more_to_do) napi_schedule(&queue->napi); + else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI, + &queue->eoi_pending) & + (NETBK_TX_EOI | NETBK_COMMON_EOI)) + xen_irq_lateeoi(queue->tx_irq, 0); } static void tx_add_credit(struct xenvif_queue *queue) @@ -185,7 +198,8 @@ static void tx_add_credit(struct xenvif_queue *queue) void xenvif_tx_credit_callback(struct timer_list *t) { - struct xenvif_queue *queue = from_timer(queue, t, credit_timeout); + struct xenvif_queue *queue = timer_container_of(queue, t, + credit_timeout); tx_add_credit(queue); xenvif_napi_schedule_or_enable_events(queue); } @@ -195,13 +209,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned int extra_count, RING_IDX end) { RING_IDX cons = queue->tx.req_cons; - unsigned long flags; do { - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; RING_COPY_REQUEST(&queue->tx, cons++, txp); @@ -319,10 +329,14 @@ static int xenvif_count_requests(struct xenvif_queue *queue, struct xenvif_tx_cb { - u16 pending_idx; + u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; + u8 copy_count; + u32 split_mask; }; #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) +#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) +#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, u16 pending_idx, @@ -345,6 +359,8 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) struct sk_buff *skb = alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC | __GFP_NOWARN); + + BUILD_BUG_ON(sizeof(*XENVIF_TX_CB(skb)) > sizeof(skb->cb)); if (unlikely(skb == NULL)) return NULL; @@ -357,52 +373,152 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) return skb; } -static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, - struct sk_buff *skb, - struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop, - unsigned int frag_overflow, - struct sk_buff *nskb) +static void xenvif_get_requests(struct xenvif_queue *queue, + struct sk_buff *skb, + struct xen_netif_tx_request *first, + struct xen_netif_tx_request *txfrags, + unsigned *copy_ops, + unsigned *map_ops, + unsigned int frag_overflow, + struct sk_buff *nskb, + unsigned int extra_count, + unsigned int data_len) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; - int start; + u16 pending_idx; pending_ring_idx_t index; unsigned int nr_slots; + struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; + struct xen_netif_tx_request *txp = first; + + nr_slots = shinfo->nr_frags + frag_overflow + 1; + + copy_count(skb) = 0; + XENVIF_TX_CB(skb)->split_mask = 0; + + /* Create copy ops for exactly data_len bytes into the skb head. */ + __skb_put(skb, data_len); + while (data_len > 0) { + int amount = data_len > txp->size ? txp->size : data_len; + bool split = false; + + cop->source.u.ref = txp->gref; + cop->source.domid = queue->vif->domid; + cop->source.offset = txp->offset; + + cop->dest.domid = DOMID_SELF; + cop->dest.offset = (offset_in_page(skb->data + + skb_headlen(skb) - + data_len)) & ~XEN_PAGE_MASK; + cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) + - data_len); + + /* Don't cross local page boundary! */ + if (cop->dest.offset + amount > XEN_PAGE_SIZE) { + amount = XEN_PAGE_SIZE - cop->dest.offset; + XENVIF_TX_CB(skb)->split_mask |= 1U << copy_count(skb); + split = true; + } - nr_slots = shinfo->nr_frags; + cop->len = amount; + cop->flags = GNTCOPY_source_gref; + + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + callback_param(queue, pending_idx).ctx = NULL; + copy_pending_idx(skb, copy_count(skb)) = pending_idx; + if (!split) + copy_count(skb)++; + + cop++; + data_len -= amount; + + if (amount == txp->size) { + /* The copy op covered the full tx_request */ + + memcpy(&queue->pending_tx_info[pending_idx].req, + txp, sizeof(*txp)); + queue->pending_tx_info[pending_idx].extra_count = + (txp == first) ? extra_count : 0; + + if (txp == first) + txp = txfrags; + else + txp++; + queue->pending_cons++; + nr_slots--; + } else { + /* The copy op partially covered the tx_request. + * The remainder will be mapped or copied in the next + * iteration. + */ + txp->offset += amount; + txp->size -= amount; + } + } - /* Skip first skb fragment if it is on same page as header fragment. */ - start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); + for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; + nr_slots--) { + if (unlikely(!txp->size)) { + make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); + ++txp; + continue; + } - for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; - xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); + xenvif_tx_create_map_op(queue, pending_idx, txp, + txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + ++shinfo->nr_frags; + ++gop; + + if (txp == first) + txp = txfrags; + else + txp++; } - if (frag_overflow) { + if (nr_slots > 0) { shinfo = skb_shinfo(nskb); frags = shinfo->frags; - for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow; - shinfo->nr_frags++, txp++, gop++) { + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { + if (unlikely(!txp->size)) { + make_tx_response(queue, txp, 0, + XEN_NETIF_RSP_OKAY); + continue; + } + index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + ++shinfo->nr_frags; + ++gop; } - skb_shinfo(skb)->frag_list = nskb; + if (shinfo->nr_frags) { + skb_shinfo(skb)->frag_list = nskb; + nskb = NULL; + } + } + + if (nskb) { + /* A frag_list skb was allocated but it is no longer needed + * because enough slots were converted to copy ops above or some + * were empty. + */ + kfree_skb(nskb); } - return gop; + (*copy_ops) = cop - queue->tx_copy_ops; + (*map_ops) = gop - queue->tx_map_ops; } static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, @@ -438,7 +554,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct gnttab_copy **gopp_copy) { struct gnttab_map_grant_ref *gop_map = *gopp_map; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; + u16 pending_idx; /* This always points to the shinfo of the skb being checked, which * could be either the first or the one on the frag_list */ @@ -449,24 +565,44 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; const bool sharedslot = nr_frags && - frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; - int i, err; + frag_get_pending_idx(&shinfo->frags[0]) == + copy_pending_idx(skb, copy_count(skb) - 1); + int i, err = 0; - /* Check status of header. */ - err = (*gopp_copy)->status; - if (unlikely(err)) { - if (net_ratelimit()) - netdev_dbg(queue->vif->dev, - "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", - (*gopp_copy)->status, - pending_idx, - (*gopp_copy)->source.u.ref); - /* The first frag might still have this slot mapped */ - if (!sharedslot) - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_ERROR); + for (i = 0; i < copy_count(skb); i++) { + int newerr; + + /* Check status of header. */ + pending_idx = copy_pending_idx(skb, i); + + newerr = (*gopp_copy)->status; + + /* Split copies need to be handled together. */ + if (XENVIF_TX_CB(skb)->split_mask & (1U << i)) { + (*gopp_copy)++; + if (!newerr) + newerr = (*gopp_copy)->status; + } + if (likely(!newerr)) { + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); + } else { + err = newerr; + if (net_ratelimit()) + netdev_dbg(queue->vif->dev, + "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", + (*gopp_copy)->status, + pending_idx, + (*gopp_copy)->source.u.ref); + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); + } + (*gopp_copy)++; } - (*gopp_copy)++; check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { @@ -488,7 +624,7 @@ check_frags: * the header's copy failed, and they are * sharing a slot, send an error */ - if (i == 0 && sharedslot) + if (i == 0 && !first_shinfo && sharedslot) xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR); else @@ -513,14 +649,6 @@ check_frags: if (err) continue; - /* First error: if the header haven't shared a slot with the - * first frag, release it as well. - */ - if (!sharedslot) - xenvif_idx_release(queue, - XENVIF_TX_CB(skb)->pending_idx, - XEN_NETIF_RSP_OKAY); - /* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); @@ -546,8 +674,8 @@ check_frags: } if (skb_has_frag_list(skb) && !first_shinfo) { - first_shinfo = skb_shinfo(skb); - shinfo = skb_shinfo(skb_shinfo(skb)->frag_list); + first_shinfo = shinfo; + shinfo = skb_shinfo(shinfo->frag_list); nr_frags = shinfo->nr_frags; goto check_frags; @@ -584,7 +712,7 @@ static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb) prev_pending_idx = pending_idx; txp = &queue->pending_tx_info[pending_idx].req; - page = virt_to_page(idx_to_kaddr(queue, pending_idx)); + page = virt_to_page((void *)idx_to_kaddr(queue, pending_idx)); __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); skb->len += txp->size; skb->data_len += txp->size; @@ -790,7 +918,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops; struct sk_buff *skb, *nskb; int ret; unsigned int frag_overflow; @@ -800,11 +927,9 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; unsigned int extra_count; - u16 pending_idx; RING_IDX idx; int work_to_do; unsigned int data_len; - pending_ring_idx_t index; if (queue->tx.sring->req_prod - queue->tx.req_cons > XEN_NETIF_TX_RING_SIZE) { @@ -817,7 +942,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, break; } - work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx); + work_to_do = XEN_RING_NR_UNCONSUMED_REQUESTS(&queue->tx); if (!work_to_do) break; @@ -856,7 +981,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (ret == 0) ? XEN_NETIF_RSP_OKAY : XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); continue; } @@ -868,12 +992,15 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, make_tx_response(queue, &txreq, extra_count, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); continue; } + data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? + XEN_NETBACK_TX_COPY_LEN : txreq.size; + ret = xenvif_count_requests(queue, &txreq, extra_count, txfrags, work_to_do); + if (unlikely(ret < 0)) break; @@ -888,20 +1015,14 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, /* No crossing a page as the payload mustn't fragment. */ if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) { - netdev_err(queue->vif->dev, - "txreq.offset: %u, size: %u, end: %lu\n", - txreq.offset, txreq.size, - (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size); + netdev_err(queue->vif->dev, "Cross page boundary, txreq.offset: %u, size: %u\n", + txreq.offset, txreq.size); xenvif_fatal_tx_err(queue->vif); break; } - index = pending_index(queue->pending_cons); - pending_idx = queue->pending_ring[index]; - - data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && - ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? - XEN_NETBACK_TX_COPY_LEN : txreq.size; + if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) + data_len = txreq.size; skb = xenvif_alloc_skb(data_len); if (unlikely(skb == NULL)) { @@ -912,8 +1033,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, } skb_shinfo(skb)->nr_frags = ret; - if (data_len < txreq.size) - skb_shinfo(skb)->nr_frags++; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ @@ -925,6 +1044,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; nskb = xenvif_alloc_skb(0); if (unlikely(nskb == NULL)) { + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); xenvif_tx_err(queue, &txreq, extra_count, idx); if (net_ratelimit()) @@ -940,6 +1060,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, if (xenvif_set_skb_gso(queue->vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); kfree_skb(nskb); break; @@ -973,54 +1094,15 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, type); } - XENVIF_TX_CB(skb)->pending_idx = pending_idx; - - __skb_put(skb, data_len); - queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; - queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; - queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; - - queue->tx_copy_ops[*copy_ops].dest.u.gmfn = - virt_to_gfn(skb->data); - queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; - queue->tx_copy_ops[*copy_ops].dest.offset = - offset_in_page(skb->data) & ~XEN_PAGE_MASK; - - queue->tx_copy_ops[*copy_ops].len = data_len; - queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; - - (*copy_ops)++; - - if (data_len < txreq.size) { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - pending_idx); - xenvif_tx_create_map_op(queue, pending_idx, &txreq, - extra_count, gop); - gop++; - } else { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - INVALID_PENDING_IDX); - memcpy(&queue->pending_tx_info[pending_idx].req, - &txreq, sizeof(txreq)); - queue->pending_tx_info[pending_idx].extra_count = - extra_count; - } - - queue->pending_cons++; - - gop = xenvif_get_requests(queue, skb, txfrags, gop, - frag_overflow, nskb); + xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, + map_ops, frag_overflow, nskb, extra_count, + data_len); __skb_queue_tail(&queue->tx_queue, skb); queue->tx.req_cons = idx; - - if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || - (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) - break; } - (*map_ops) = gop - queue->tx_map_ops; return; } @@ -1055,7 +1137,7 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s int j; skb->truesize += skb->data_len; for (j = 0; j < i; j++) - put_page(frags[j].page.p); + put_page(skb_frag_page(&frags[j])); return -ENOMEM; } @@ -1067,23 +1149,16 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s BUG(); offset += len; - frags[i].page.p = page; - frags[i].page_offset = 0; - skb_frag_size_set(&frags[i], len); + skb_frag_fill_page_desc(&frags[i], page, 0, len); } - /* Copied all the bits from the frag list -- free it. */ - skb_frag_list_init(skb); - xenvif_skb_zerocopy_prepare(queue, nskb); - kfree_skb(nskb); - /* Release all the original (foreign) frags. */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) skb_frag_unref(skb, f); uarg = skb_shinfo(skb)->destructor_arg; /* increase inflight counter to offset decrement in callback */ atomic_inc(&queue->inflight_packets); - uarg->callback(uarg, true); + uarg->ops->complete(NULL, uarg, true); skb_shinfo(skb)->destructor_arg = NULL; /* Fill the skb with the new (local) frags. */ @@ -1104,9 +1179,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { struct xen_netif_tx_request *txp; u16 pending_idx; - unsigned data_len; - pending_idx = XENVIF_TX_CB(skb)->pending_idx; + pending_idx = copy_pending_idx(skb, 0); txp = &queue->pending_tx_info[pending_idx].req; /* Check the remap error code. */ @@ -1125,18 +1199,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) continue; } - data_len = skb->len; - callback_param(queue, pending_idx).ctx = NULL; - if (data_len < txp->size) { - /* Append the packet payload as a fragment. */ - txp->offset += data_len; - txp->size -= data_len; - } else { - /* Schedule a response immediately. */ - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_OKAY); - } - if (txp->flags & XEN_NETTXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (txp->flags & XEN_NETTXF_data_validated) @@ -1145,6 +1207,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) xenvif_fill_frags(queue, skb); if (unlikely(skb_has_frag_list(skb))) { + struct sk_buff *nskb = skb_shinfo(skb)->frag_list; + xenvif_skb_zerocopy_prepare(queue, nskb); if (xenvif_handle_frag_list(queue, skb)) { if (net_ratelimit()) netdev_err(queue->vif->dev, @@ -1153,6 +1217,9 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) kfree_skb(skb); continue; } + /* Copied all the bits from the frag list -- free it. */ + skb_frag_list_init(skb); + kfree_skb(nskb); } skb->dev = queue->vif->dev; @@ -1169,17 +1236,24 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) continue; } - skb_probe_transport_header(skb, 0); + skb_probe_transport_header(skb); /* If the packet is GSO then we will have just set up the * transport header offset in checksum_setup so it's now * straightforward to calculate gso_segs. */ if (skb_is_gso(skb)) { - int mss = skb_shinfo(skb)->gso_size; - int hdrlen = skb_transport_header(skb) - - skb_mac_header(skb) + - tcp_hdrlen(skb); + int mss, hdrlen; + + /* GSO implies having the L4 header. */ + WARN_ON_ONCE(!skb_transport_header_was_set(skb)); + if (unlikely(!skb_transport_header_was_set(skb))) { + kfree_skb(skb); + continue; + } + + mss = skb_shinfo(skb)->gso_size; + hdrlen = skb_tcp_all_headers(skb); skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len - hdrlen, mss); @@ -1206,10 +1280,13 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) return work_done; } -void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) +static void xenvif_zerocopy_callback(struct sk_buff *skb, + struct ubuf_info *ubuf_base, + bool zerocopy_success) { unsigned long flags; pending_ring_idx_t index; + struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base); struct xenvif_queue *queue = ubuf_to_queue(ubuf); /* This is the only place where we grab this lock, to protect callbacks @@ -1218,7 +1295,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) spin_lock_irqsave(&queue->callback_lock, flags); do { u16 pending_idx = ubuf->desc; - ubuf = (struct ubuf_info *) ubuf->ctx; + ubuf = (struct ubuf_info_msgzc *) ubuf->ctx; BUG_ON(queue->dealloc_prod - queue->dealloc_cons >= MAX_PENDING_REQS); index = pending_index(queue->dealloc_prod); @@ -1238,6 +1315,10 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) xenvif_skb_zerocopy_complete(queue); } +const struct ubuf_info_ops xenvif_ubuf_ops = { + .complete = xenvif_zerocopy_callback, +}; + static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) { struct gnttab_unmap_grant_ref *gop; @@ -1308,7 +1389,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) /* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif_queue *queue, int budget) { - unsigned nr_mops, nr_cops = 0; + unsigned nr_mops = 0, nr_cops = 0; int work_done, ret; if (unlikely(!tx_work_todo(queue))) @@ -1325,7 +1406,15 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) NULL, queue->pages_to_map, nr_mops); - BUG_ON(ret); + if (ret) { + unsigned int i; + + netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n", + nr_mops, ret); + for (i = 0; i < nr_mops; ++i) + WARN_ON_ONCE(queue->tx_map_ops[i].status == + GNTST_okay); + } } work_done = xenvif_tx_submit(queue); @@ -1333,8 +1422,35 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return work_done; } +static void _make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, + unsigned int extra_count, + s8 status) +{ + RING_IDX i = queue->tx.rsp_prod_pvt; + struct xen_netif_tx_response *resp; + + resp = RING_GET_RESPONSE(&queue->tx, i); + resp->id = txp->id; + resp->status = status; + + while (extra_count-- != 0) + RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + + queue->tx.rsp_prod_pvt = ++i; +} + +static void push_tx_responses(struct xenvif_queue *queue) +{ + int notify; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); + if (notify) + notify_remote_via_irq(queue->tx_irq); +} + static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status) + s8 status) { struct pending_tx_info *pending_tx_info; pending_ring_idx_t index; @@ -1344,8 +1460,8 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_lock_irqsave(&queue->response_lock, flags); - make_tx_response(queue, &pending_tx_info->req, - pending_tx_info->extra_count, status); + _make_tx_response(queue, &pending_tx_info->req, + pending_tx_info->extra_count, status); /* Release the pending index before pusing the Tx response so * its available before a new Tx request is pushed by the @@ -1359,35 +1475,22 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_unlock_irqrestore(&queue->response_lock, flags); } - static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st) + s8 status) { - RING_IDX i = queue->tx.rsp_prod_pvt; - struct xen_netif_tx_response *resp; - - resp = RING_GET_RESPONSE(&queue->tx, i); - resp->id = txp->id; - resp->status = st; - - while (extra_count-- != 0) - RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + unsigned long flags; - queue->tx.rsp_prod_pvt = ++i; -} + spin_lock_irqsave(&queue->response_lock, flags); -static void push_tx_responses(struct xenvif_queue *queue) -{ - int notify; + _make_tx_response(queue, txp, extra_count, status); + push_tx_responses(queue); - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); - if (notify) - notify_remote_via_irq(queue->tx_irq); + spin_unlock_irqrestore(&queue->response_lock, flags); } -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) { int ret; struct gnttab_unmap_grant_ref tx_unmap_op; @@ -1442,8 +1545,8 @@ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, void *addr; struct xen_netif_tx_sring *txs; struct xen_netif_rx_sring *rxs; - - int err = -ENOMEM; + RING_IDX rsp_prod, req_prod; + int err; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), &tx_ring_ref, 1, &addr); @@ -1451,7 +1554,14 @@ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, goto err; txs = (struct xen_netif_tx_sring *)addr; - BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE); + rsp_prod = READ_ONCE(txs->rsp_prod); + req_prod = READ_ONCE(txs->req_prod); + + BACK_RING_ATTACH(&queue->tx, txs, rsp_prod, XEN_PAGE_SIZE); + + err = -EIO; + if (req_prod - rsp_prod > RING_SIZE(&queue->tx)) + goto err; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), &rx_ring_ref, 1, &addr); @@ -1459,7 +1569,14 @@ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, goto err; rxs = (struct xen_netif_rx_sring *)addr; - BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE); + rsp_prod = READ_ONCE(rxs->rsp_prod); + req_prod = READ_ONCE(rxs->req_prod); + + BACK_RING_ATTACH(&queue->rx, rxs, rsp_prod, XEN_PAGE_SIZE); + + err = -EIO; + if (req_prod - rsp_prod > RING_SIZE(&queue->rx)) + goto err; return 0; @@ -1611,9 +1728,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif) irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) { struct xenvif *vif = data; + unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS; - while (xenvif_ctrl_work_todo(vif)) + while (xenvif_ctrl_work_todo(vif)) { xenvif_ctrl_action(vif); + eoi_flag = 0; + } + + xen_irq_lateeoi(irq, eoi_flag); return IRQ_HANDLED; } @@ -1644,9 +1766,6 @@ static int __init netback_init(void) #ifdef CONFIG_DEBUG_FS xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); - if (IS_ERR_OR_NULL(xen_netback_dbg_root)) - pr_warn("Init of debugfs returned %ld!\n", - PTR_ERR(xen_netback_dbg_root)); #endif /* CONFIG_DEBUG_FS */ return 0; @@ -1666,5 +1785,6 @@ static void __exit netback_fini(void) } module_exit(netback_fini); +MODULE_DESCRIPTION("Xen backend network device module"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("xen-backend:vif"); diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index ef5887037b22..0ba754ebc5ba 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -33,22 +33,37 @@ #include <xen/xen.h> #include <xen/events.h> +/* + * Update the needed ring page slots for the first SKB queued. + * Note that any call sequence outside the RX thread calling this function + * needs to wake up the RX thread via a call of xenvif_kick_thread() + * afterwards in order to avoid a race with putting the thread to sleep. + */ +static void xenvif_update_needed_slots(struct xenvif_queue *queue, + const struct sk_buff *skb) +{ + unsigned int needed = 0; + + if (skb) { + needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); + if (skb_is_gso(skb)) + needed++; + if (skb->sw_hash) + needed++; + } + + WRITE_ONCE(queue->rx_slots_needed, needed); +} + static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) { RING_IDX prod, cons; - struct sk_buff *skb; - int needed; + unsigned int needed; - skb = skb_peek(&queue->rx_queue); - if (!skb) + needed = READ_ONCE(queue->rx_slots_needed); + if (!needed) return false; - needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); - if (skb_is_gso(skb)) - needed++; - if (skb->sw_hash) - needed++; - do { prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; @@ -67,22 +82,30 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) return false; } -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) { unsigned long flags; + bool ret = true; spin_lock_irqsave(&queue->rx_queue.lock, flags); - __skb_queue_tail(&queue->rx_queue, skb); - - queue->rx_queue_len += skb->len; - if (queue->rx_queue_len > queue->rx_queue_max) { + if (queue->rx_queue_len >= queue->rx_queue_max) { struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); + ret = false; + } else { + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + + return ret; } static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) @@ -93,6 +116,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) skb = __skb_dequeue(&queue->rx_queue); if (skb) { + xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue)); + queue->rx_queue_len -= skb->len; if (queue->rx_queue_len < queue->rx_queue_max) { struct netdev_queue *txq; @@ -127,6 +152,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) break; xenvif_rx_dequeue(queue); kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; } } @@ -258,6 +284,19 @@ static void xenvif_rx_next_skb(struct xenvif_queue *queue, pkt->extra_count++; } + if (queue->vif->xdp_headroom) { + struct xen_netif_extra_info *extra; + + extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_XDP - 1]; + + memset(extra, 0, sizeof(struct xen_netif_extra_info)); + extra->u.xdp.headroom = queue->vif->xdp_headroom; + extra->type = XEN_NETIF_EXTRA_TYPE_XDP; + extra->flags = 0; + + pkt->extra_count++; + } + if (skb->sw_hash) { struct xen_netif_extra_info *extra; @@ -356,7 +395,7 @@ static void xenvif_rx_data_slot(struct xenvif_queue *queue, struct xen_netif_rx_request *req, struct xen_netif_rx_response *rsp) { - unsigned int offset = 0; + unsigned int offset = queue->vif->xdp_headroom; unsigned int flags; do { @@ -449,7 +488,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue) #define RX_BATCH_SIZE 64 -void xenvif_rx_action(struct xenvif_queue *queue) +static void xenvif_rx_action(struct xenvif_queue *queue) { struct sk_buff_head completed_skbs; unsigned int work_done = 0; @@ -458,6 +497,7 @@ void xenvif_rx_action(struct xenvif_queue *queue) queue->rx_copy.completed = &completed_skbs; while (xenvif_rx_ring_slots_available(queue) && + !skb_queue_empty(&queue->rx_queue) && work_done < RX_BATCH_SIZE) { xenvif_rx_skb(queue); work_done++; @@ -467,36 +507,40 @@ void xenvif_rx_action(struct xenvif_queue *queue) xenvif_rx_copy_flush(queue); } -static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) +static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue) { RING_IDX prod, cons; prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; + return prod - cons; +} + +static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue) +{ + unsigned int needed = READ_ONCE(queue->rx_slots_needed); + return !queue->stalled && - prod - cons < 1 && + xenvif_rx_queue_slots(queue) < needed && time_after(jiffies, queue->last_rx_time + queue->vif->stall_timeout); } static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) { - RING_IDX prod, cons; + unsigned int needed = READ_ONCE(queue->rx_slots_needed); - prod = queue->rx.sring->req_prod; - cons = queue->rx.req_cons; - - return queue->stalled && prod - cons >= 1; + return queue->stalled && xenvif_rx_queue_slots(queue) >= needed; } -static bool xenvif_have_rx_work(struct xenvif_queue *queue) +bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) { return xenvif_rx_ring_slots_available(queue) || (queue->vif->stall_timeout && (xenvif_rx_queue_stalled(queue) || xenvif_rx_queue_ready(queue))) || - kthread_should_stop() || + (test_kthread && kthread_should_stop()) || queue->vif->disabled; } @@ -527,15 +571,20 @@ static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) { DEFINE_WAIT(wait); - if (xenvif_have_rx_work(queue)) + if (xenvif_have_rx_work(queue, true)) return; for (;;) { long ret; prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); - if (xenvif_have_rx_work(queue)) + if (xenvif_have_rx_work(queue, true)) break; + if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI, + &queue->eoi_pending) & + (NETBK_RX_EOI | NETBK_COMMON_EOI)) + xen_irq_lateeoi(queue->rx_irq, 0); + ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); if (!ret) break; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 2625740bdc4a..a78a25b87240 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -1,43 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Xenbus code for netif backend * * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> * Copyright (C) 2005 XenSource Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "common.h" #include <linux/vmalloc.h> #include <linux/rtnetlink.h> -struct backend_info { - struct xenbus_device *dev; - struct xenvif *vif; - - /* This is the state that will be reflected in xenstore when any - * active hotplug script completes. - */ - enum xenbus_state state; - - enum xenbus_state frontend_state; - struct xenbus_watch hotplug_status_watch; - u8 have_hotplug_status_watch:1; - - const char *hotplug_script; -}; - static int connect_data_rings(struct backend_info *be, struct xenvif_queue *queue); static void connect(struct backend_info *be); @@ -198,240 +170,37 @@ DEFINE_SHOW_ATTRIBUTE(xenvif_ctrl); static void xenvif_debugfs_addif(struct xenvif *vif) { - struct dentry *pfile; int i; - if (IS_ERR_OR_NULL(xen_netback_dbg_root)) - return; - vif->xenvif_dbg_root = debugfs_create_dir(vif->dev->name, xen_netback_dbg_root); - if (!IS_ERR_OR_NULL(vif->xenvif_dbg_root)) { - for (i = 0; i < vif->num_queues; ++i) { - char filename[sizeof("io_ring_q") + 4]; - - snprintf(filename, sizeof(filename), "io_ring_q%d", i); - pfile = debugfs_create_file(filename, - 0600, - vif->xenvif_dbg_root, - &vif->queues[i], - &xenvif_dbg_io_ring_ops_fops); - if (IS_ERR_OR_NULL(pfile)) - pr_warn("Creation of io_ring file returned %ld!\n", - PTR_ERR(pfile)); - } + for (i = 0; i < vif->num_queues; ++i) { + char filename[sizeof("io_ring_q") + 4]; - if (vif->ctrl_irq) { - pfile = debugfs_create_file("ctrl", - 0400, - vif->xenvif_dbg_root, - vif, - &xenvif_ctrl_fops); - if (IS_ERR_OR_NULL(pfile)) - pr_warn("Creation of ctrl file returned %ld!\n", - PTR_ERR(pfile)); - } - } else - netdev_warn(vif->dev, - "Creation of vif debugfs dir returned %ld!\n", - PTR_ERR(vif->xenvif_dbg_root)); + snprintf(filename, sizeof(filename), "io_ring_q%d", i); + debugfs_create_file(filename, 0600, vif->xenvif_dbg_root, + &vif->queues[i], + &xenvif_dbg_io_ring_ops_fops); + } + + if (vif->ctrl_irq) + debugfs_create_file("ctrl", 0400, vif->xenvif_dbg_root, vif, + &xenvif_ctrl_fops); } static void xenvif_debugfs_delif(struct xenvif *vif) { - if (IS_ERR_OR_NULL(xen_netback_dbg_root)) - return; - debugfs_remove_recursive(vif->xenvif_dbg_root); vif->xenvif_dbg_root = NULL; } #endif /* CONFIG_DEBUG_FS */ -static int netback_remove(struct xenbus_device *dev) -{ - struct backend_info *be = dev_get_drvdata(&dev->dev); - - set_backend_state(be, XenbusStateClosed); - - unregister_hotplug_status_watch(be); - if (be->vif) { - kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); - xen_unregister_watchers(be->vif); - xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); - xenvif_free(be->vif); - be->vif = NULL; - } - kfree(be->hotplug_script); - kfree(be); - dev_set_drvdata(&dev->dev, NULL); - return 0; -} - - -/** - * Entry point to this code when a new device is created. Allocate the basic - * structures and switch to InitWait. - */ -static int netback_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) -{ - const char *message; - struct xenbus_transaction xbt; - int err; - int sg; - const char *script; - struct backend_info *be = kzalloc(sizeof(struct backend_info), - GFP_KERNEL); - if (!be) { - xenbus_dev_fatal(dev, -ENOMEM, - "allocating backend structure"); - return -ENOMEM; - } - - be->dev = dev; - dev_set_drvdata(&dev->dev, be); - - be->state = XenbusStateInitialising; - err = xenbus_switch_state(dev, XenbusStateInitialising); - if (err) - goto fail; - - sg = 1; - - do { - err = xenbus_transaction_start(&xbt); - if (err) { - xenbus_dev_fatal(dev, err, "starting transaction"); - goto fail; - } - - err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); - if (err) { - message = "writing feature-sg"; - goto abort_transaction; - } - - err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", - "%d", sg); - if (err) { - message = "writing feature-gso-tcpv4"; - goto abort_transaction; - } - - err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", - "%d", sg); - if (err) { - message = "writing feature-gso-tcpv6"; - goto abort_transaction; - } - - /* We support partial checksum setup for IPv6 packets */ - err = xenbus_printf(xbt, dev->nodename, - "feature-ipv6-csum-offload", - "%d", 1); - if (err) { - message = "writing feature-ipv6-csum-offload"; - goto abort_transaction; - } - - /* We support rx-copy path. */ - err = xenbus_printf(xbt, dev->nodename, - "feature-rx-copy", "%d", 1); - if (err) { - message = "writing feature-rx-copy"; - goto abort_transaction; - } - - /* - * We don't support rx-flip path (except old guests who don't - * grok this feature flag). - */ - err = xenbus_printf(xbt, dev->nodename, - "feature-rx-flip", "%d", 0); - if (err) { - message = "writing feature-rx-flip"; - goto abort_transaction; - } - - /* We support dynamic multicast-control. */ - err = xenbus_printf(xbt, dev->nodename, - "feature-multicast-control", "%d", 1); - if (err) { - message = "writing feature-multicast-control"; - goto abort_transaction; - } - - err = xenbus_printf(xbt, dev->nodename, - "feature-dynamic-multicast-control", - "%d", 1); - if (err) { - message = "writing feature-dynamic-multicast-control"; - goto abort_transaction; - } - - err = xenbus_transaction_end(xbt, 0); - } while (err == -EAGAIN); - - if (err) { - xenbus_dev_fatal(dev, err, "completing transaction"); - goto fail; - } - - /* - * Split event channels support, this is optional so it is not - * put inside the above loop. - */ - err = xenbus_printf(XBT_NIL, dev->nodename, - "feature-split-event-channels", - "%u", separate_tx_rx_irq); - if (err) - pr_debug("Error writing feature-split-event-channels\n"); - - /* Multi-queue support: This is an optional feature. */ - err = xenbus_printf(XBT_NIL, dev->nodename, - "multi-queue-max-queues", "%u", xenvif_max_queues); - if (err) - pr_debug("Error writing multi-queue-max-queues\n"); - - err = xenbus_printf(XBT_NIL, dev->nodename, - "feature-ctrl-ring", - "%u", true); - if (err) - pr_debug("Error writing feature-ctrl-ring\n"); - - script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL); - if (IS_ERR(script)) { - err = PTR_ERR(script); - xenbus_dev_fatal(dev, err, "reading script"); - goto fail; - } - - be->hotplug_script = script; - - - /* This kicks hotplug scripts, so do it immediately. */ - err = backend_create_xenvif(be); - if (err) - goto fail; - - return 0; - -abort_transaction: - xenbus_transaction_end(xbt, 1); - xenbus_dev_fatal(dev, err, "%s", message); -fail: - pr_debug("failed\n"); - netback_remove(dev); - return err; -} - - /* * Handle the creation of the hotplug script environment. We add the script * and vif variables to the environment, for the benefit of the vif-* hotplug * scripts. */ -static int netback_uevent(struct xenbus_device *xdev, +static int netback_uevent(const struct xenbus_device *xdev, struct kobj_uevent_env *env) { struct backend_info *be = dev_get_drvdata(&xdev->dev); @@ -472,6 +241,7 @@ static int backend_create_xenvif(struct backend_info *be) return err; } be->vif = vif; + vif->be = be; kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); return 0; @@ -623,7 +393,25 @@ static void set_backend_state(struct backend_info *be, } } -/** +static void read_xenbus_frontend_xdp(struct backend_info *be, + struct xenbus_device *dev) +{ + struct xenvif *vif = be->vif; + u16 headroom; + int err; + + err = xenbus_scanf(XBT_NIL, dev->otherend, + "xdp-headroom", "%hu", &headroom); + if (err != 1) { + vif->xdp_headroom = 0; + return; + } + if (headroom > XEN_NETIF_MAX_XDP_HEADROOM) + headroom = XEN_NETIF_MAX_XDP_HEADROOM; + vif->xdp_headroom = headroom; +} + +/* * Callback received when the frontend's state changes. */ static void frontend_changed(struct xenbus_device *dev, @@ -647,6 +435,11 @@ static void frontend_changed(struct xenbus_device *dev, set_backend_state(be, XenbusStateConnected); break; + case XenbusStateReconfiguring: + read_xenbus_frontend_xdp(be, dev); + xenbus_switch_state(dev, XenbusStateReconfigured); + break; + case XenbusStateClosing: set_backend_state(be, XenbusStateClosing); break; @@ -655,7 +448,7 @@ static void frontend_changed(struct xenbus_device *dev, set_backend_state(be, XenbusStateClosed); if (xenbus_dev_is_online(dev)) break; - /* fall through if not online */ + fallthrough; /* if not online */ case XenbusStateUnknown: set_backend_state(be, XenbusStateClosed); device_unregister(&dev->dev); @@ -764,12 +557,14 @@ static int xen_register_credit_watch(struct xenbus_device *dev, return -ENOMEM; snprintf(node, maxlen, "%s/rate", dev->nodename); vif->credit_watch.node = node; + vif->credit_watch.will_handle = NULL; vif->credit_watch.callback = xen_net_rate_changed; err = register_xenbus_watch(&vif->credit_watch); if (err) { pr_err("Failed to set watcher %s\n", vif->credit_watch.node); kfree(node); vif->credit_watch.node = NULL; + vif->credit_watch.will_handle = NULL; vif->credit_watch.callback = NULL; } return err; @@ -816,6 +611,7 @@ static int xen_register_mcast_ctrl_watch(struct xenbus_device *dev, snprintf(node, maxlen, "%s/request-multicast-control", dev->otherend); vif->mcast_ctrl_watch.node = node; + vif->mcast_ctrl_watch.will_handle = NULL; vif->mcast_ctrl_watch.callback = xen_mcast_ctrl_changed; err = register_xenbus_watch(&vif->mcast_ctrl_watch); if (err) { @@ -823,6 +619,7 @@ static int xen_register_mcast_ctrl_watch(struct xenbus_device *dev, vif->mcast_ctrl_watch.node); kfree(node); vif->mcast_ctrl_watch.node = NULL; + vif->mcast_ctrl_watch.will_handle = NULL; vif->mcast_ctrl_watch.callback = NULL; } return err; @@ -1026,7 +823,7 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, hotplug_status_changed, "%s/%s", dev->nodename, "hotplug-status"); if (!err) @@ -1068,13 +865,12 @@ static int connect_data_rings(struct backend_info *be, * queue-N. */ if (num_queues == 1) { - xspath = kzalloc(strlen(dev->otherend) + 1, GFP_KERNEL); + xspath = kstrdup(dev->otherend, GFP_KERNEL); if (!xspath) { xenbus_dev_fatal(dev, -ENOMEM, "reading ring references"); return -ENOMEM; } - strcpy(xspath, dev->otherend); } else { xspathsize = strlen(dev->otherend) + xenstore_path_ext_size; xspath = kzalloc(xspathsize, GFP_KERNEL); @@ -1176,9 +972,188 @@ static int read_xenbus_vif_flags(struct backend_info *be) vif->ipv6_csum = !!xenbus_read_unsigned(dev->otherend, "feature-ipv6-csum-offload", 0); + read_xenbus_frontend_xdp(be, dev); + return 0; } +static void netback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + + unregister_hotplug_status_watch(be); + xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); + if (be->vif) { + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); + backend_disconnect(be); + xenvif_free(be->vif); + be->vif = NULL; + } + kfree(be->hotplug_script); + kfree(be); + dev_set_drvdata(&dev->dev, NULL); +} + +/* + * Entry point to this code when a new device is created. Allocate the basic + * structures and switch to InitWait. + */ +static int netback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + const char *message; + struct xenbus_transaction xbt; + int err; + int sg; + const char *script; + struct backend_info *be = kzalloc(sizeof(*be), GFP_KERNEL); + + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + + be->dev = dev; + dev_set_drvdata(&dev->dev, be); + + sg = 1; + + do { + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto fail; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); + if (err) { + message = "writing feature-sg"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", + "%d", sg); + if (err) { + message = "writing feature-gso-tcpv4"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", + "%d", sg); + if (err) { + message = "writing feature-gso-tcpv6"; + goto abort_transaction; + } + + /* We support partial checksum setup for IPv6 packets */ + err = xenbus_printf(xbt, dev->nodename, + "feature-ipv6-csum-offload", + "%d", 1); + if (err) { + message = "writing feature-ipv6-csum-offload"; + goto abort_transaction; + } + + /* We support rx-copy path. */ + err = xenbus_printf(xbt, dev->nodename, + "feature-rx-copy", "%d", 1); + if (err) { + message = "writing feature-rx-copy"; + goto abort_transaction; + } + + /* we can adjust a headroom for netfront XDP processing */ + err = xenbus_printf(xbt, dev->nodename, + "feature-xdp-headroom", "%d", + provides_xdp_headroom); + if (err) { + message = "writing feature-xdp-headroom"; + goto abort_transaction; + } + + /* We don't support rx-flip path (except old guests who + * don't grok this feature flag). + */ + err = xenbus_printf(xbt, dev->nodename, + "feature-rx-flip", "%d", 0); + if (err) { + message = "writing feature-rx-flip"; + goto abort_transaction; + } + + /* We support dynamic multicast-control. */ + err = xenbus_printf(xbt, dev->nodename, + "feature-multicast-control", "%d", 1); + if (err) { + message = "writing feature-multicast-control"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, + "feature-dynamic-multicast-control", + "%d", 1); + if (err) { + message = "writing feature-dynamic-multicast-control"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + } while (err == -EAGAIN); + + if (err) { + xenbus_dev_fatal(dev, err, "completing transaction"); + goto fail; + } + + /* Split event channels support, this is optional so it is not + * put inside the above loop. + */ + err = xenbus_printf(XBT_NIL, dev->nodename, + "feature-split-event-channels", + "%u", separate_tx_rx_irq); + if (err) + pr_debug("Error writing feature-split-event-channels\n"); + + /* Multi-queue support: This is an optional feature. */ + err = xenbus_printf(XBT_NIL, dev->nodename, + "multi-queue-max-queues", "%u", xenvif_max_queues); + if (err) + pr_debug("Error writing multi-queue-max-queues\n"); + + err = xenbus_printf(XBT_NIL, dev->nodename, + "feature-ctrl-ring", + "%u", true); + if (err) + pr_debug("Error writing feature-ctrl-ring\n"); + + backend_switch_state(be, XenbusStateInitWait); + + script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL); + if (IS_ERR(script)) { + err = PTR_ERR(script); + xenbus_dev_fatal(dev, err, "reading script"); + goto fail; + } + + be->hotplug_script = script; + + /* This kicks hotplug scripts, so do it immediately. */ + err = backend_create_xenvif(be); + if (err) + goto fail; + + return 0; + +abort_transaction: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, err, "%s", message); +fail: + pr_debug("failed\n"); + netback_remove(dev); + return err; +} + static const struct xenbus_device_id netback_ids[] = { { "vif" }, { "" } @@ -1190,6 +1165,7 @@ static struct xenbus_driver netback_driver = { .remove = netback_remove, .uevent = netback_uevent, .otherend_changed = frontend_changed, + .allow_rebind = true, }; int xenvif_xenbus_init(void) |
