diff options
Diffstat (limited to 'drivers/net/xen-netfront.c')
-rw-r--r-- | drivers/net/xen-netfront.c | 496 |
1 files changed, 320 insertions, 176 deletions
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 30899901aef5..36808bf25677 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -29,6 +29,8 @@ * IN THE SOFTWARE. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> @@ -36,13 +38,14 @@ #include <linux/skbuff.h> #include <linux/ethtool.h> #include <linux/if_ether.h> -#include <linux/tcp.h> +#include <net/tcp.h> #include <linux/udp.h> #include <linux/moduleparam.h> #include <linux/mm.h> #include <linux/slab.h> #include <net/ip.h> +#include <asm/xen/page.h> #include <xen/xen.h> #include <xen/xenbus.h> #include <xen/events.h> @@ -57,8 +60,7 @@ static const struct ethtool_ops xennet_ethtool_ops; struct netfront_cb { - struct page *page; - unsigned offset; + int pull_to; }; #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) @@ -85,7 +87,15 @@ struct netfront_info { struct napi_struct napi; - unsigned int evtchn; + /* Split event channels support, tx_* == rx_* when using + * single event channel. + */ + unsigned int tx_evtchn, rx_evtchn; + unsigned int tx_irq, rx_irq; + /* Only used when split event channels support is enabled */ + char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */ + char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */ + struct xenbus_device *xbdev; spinlock_t tx_lock; @@ -276,8 +286,7 @@ no_skb: break; } - __skb_fill_page_desc(skb, 0, page, 0, 0); - skb_shinfo(skb)->nr_frags = 1; + skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); __skb_queue_tail(&np->rx_batch, skb); } @@ -330,7 +339,7 @@ no_skb: push: RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); if (notify) - notify_remote_via_irq(np->netdev->irq); + notify_remote_via_irq(np->rx_irq); } static int xennet_open(struct net_device *dev) @@ -377,9 +386,8 @@ static void xennet_tx_buf_gc(struct net_device *dev) skb = np->tx_skbs[id].skb; if (unlikely(gnttab_query_foreign_access( np->grant_tx_ref[id]) != 0)) { - printk(KERN_ALERT "xennet_tx_buf_gc: warning " - "-- grant still in use by backend " - "domain.\n"); + pr_alert("%s: warning -- grant still in use by backend domain\n", + __func__); BUG(); } gnttab_end_foreign_access_ref( @@ -452,58 +460,123 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, /* Grant backend access to each skb fragment page. */ for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; + struct page *page = skb_frag_page(frag); - tx->flags |= XEN_NETTXF_more_data; + len = skb_frag_size(frag); + offset = frag->page_offset; - id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); - np->tx_skbs[id].skb = skb_get(skb); - tx = RING_GET_REQUEST(&np->tx, prod++); - tx->id = id; - ref = gnttab_claim_grant_reference(&np->gref_tx_head); - BUG_ON((signed short)ref < 0); + /* Data must not cross a page boundary. */ + BUG_ON(len + offset > PAGE_SIZE<<compound_order(page)); - mfn = pfn_to_mfn(page_to_pfn(skb_frag_page(frag))); - gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, - mfn, GNTMAP_readonly); + /* Skip unused frames from start of page */ + page += offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; - tx->gref = np->grant_tx_ref[id] = ref; - tx->offset = frag->page_offset; - tx->size = skb_frag_size(frag); - tx->flags = 0; + while (len > 0) { + unsigned long bytes; + + BUG_ON(offset >= PAGE_SIZE); + + bytes = PAGE_SIZE - offset; + if (bytes > len) + bytes = len; + + tx->flags |= XEN_NETTXF_more_data; + + id = get_id_from_freelist(&np->tx_skb_freelist, + np->tx_skbs); + np->tx_skbs[id].skb = skb_get(skb); + tx = RING_GET_REQUEST(&np->tx, prod++); + tx->id = id; + ref = gnttab_claim_grant_reference(&np->gref_tx_head); + BUG_ON((signed short)ref < 0); + + mfn = pfn_to_mfn(page_to_pfn(page)); + gnttab_grant_foreign_access_ref(ref, + np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = bytes; + tx->flags = 0; + + offset += bytes; + len -= bytes; + + /* Next frame */ + if (offset == PAGE_SIZE && len) { + BUG_ON(!PageCompound(page)); + page++; + offset = 0; + } + } } np->tx.req_prod_pvt = prod; } +/* + * Count how many ring slots are required to send the frags of this + * skb. Each frag might be a compound page. + */ +static int xennet_count_skb_frag_slots(struct sk_buff *skb) +{ + int i, frags = skb_shinfo(skb)->nr_frags; + int pages = 0; + + for (i = 0; i < frags; i++) { + skb_frag_t *frag = skb_shinfo(skb)->frags + i; + unsigned long size = skb_frag_size(frag); + unsigned long offset = frag->page_offset; + + /* Skip unused frames from start of page */ + offset &= ~PAGE_MASK; + + pages += PFN_UP(offset + size); + } + + return pages; +} + static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned short id; struct netfront_info *np = netdev_priv(dev); struct netfront_stats *stats = this_cpu_ptr(np->stats); struct xen_netif_tx_request *tx; - struct xen_netif_extra_info *extra; char *data = skb->data; RING_IDX i; grant_ref_t ref; unsigned long mfn; int notify; - int frags = skb_shinfo(skb)->nr_frags; + int slots; unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); unsigned long flags; - frags += DIV_ROUND_UP(offset + len, PAGE_SIZE); - if (unlikely(frags > MAX_SKB_FRAGS + 1)) { - printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", - frags); - dump_stack(); + /* If skb->len is too big for wire format, drop skb and alert + * user about misconfiguration. + */ + if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) { + net_alert_ratelimited( + "xennet: skb->len = %u, too big for wire format\n", + skb->len); + goto drop; + } + + slots = DIV_ROUND_UP(offset + len, PAGE_SIZE) + + xennet_count_skb_frag_slots(skb); + if (unlikely(slots > MAX_SKB_FRAGS + 1)) { + net_alert_ratelimited( + "xennet: skb rides the rocket: %d slots\n", slots); goto drop; } spin_lock_irqsave(&np->tx_lock, flags); if (unlikely(!netif_carrier_ok(dev) || - (frags > 1 && !xennet_can_sg(dev)) || + (slots > 1 && !xennet_can_sg(dev)) || netif_needs_gso(skb, netif_skb_features(skb)))) { spin_unlock_irqrestore(&np->tx_lock, flags); goto drop; @@ -525,7 +598,6 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; - extra = NULL; tx->flags = 0; if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -541,10 +613,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) gso = (struct xen_netif_extra_info *) RING_GET_REQUEST(&np->tx, ++i); - if (extra) - extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; - else - tx->flags |= XEN_NETTXF_extra_info; + tx->flags |= XEN_NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; @@ -553,7 +622,6 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; - extra = gso; } np->tx.req_prod_pvt = i + 1; @@ -563,7 +631,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); if (notify) - notify_remote_via_irq(np->netdev->irq); + notify_remote_via_irq(np->tx_irq); u64_stats_update_begin(&stats->syncp); stats->tx_bytes += skb->len; @@ -662,7 +730,7 @@ static int xennet_get_responses(struct netfront_info *np, struct sk_buff *skb = xennet_get_rx_skb(np, cons); grant_ref_t ref = xennet_get_rx_ref(np, cons); int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); - int frags = 1; + int slots = 1; int err = 0; unsigned long ret; @@ -685,7 +753,7 @@ static int xennet_get_responses(struct netfront_info *np, /* * This definitely indicates a bug, either in this driver or in * the backend driver. In future this should flag the bad - * situation to the system controller to reboot the backed. + * situation to the system controller to reboot the backend. */ if (ref == GRANT_INVALID_REF) { if (net_ratelimit()) @@ -706,27 +774,27 @@ next: if (!(rx->flags & XEN_NETRXF_more_data)) break; - if (cons + frags == rp) { + if (cons + slots == rp) { if (net_ratelimit()) - dev_warn(dev, "Need more frags\n"); + dev_warn(dev, "Need more slots\n"); err = -ENOENT; break; } - rx = RING_GET_RESPONSE(&np->rx, cons + frags); - skb = xennet_get_rx_skb(np, cons + frags); - ref = xennet_get_rx_ref(np, cons + frags); - frags++; + rx = RING_GET_RESPONSE(&np->rx, cons + slots); + skb = xennet_get_rx_skb(np, cons + slots); + ref = xennet_get_rx_ref(np, cons + slots); + slots++; } - if (unlikely(frags > max)) { + if (unlikely(slots > max)) { if (net_ratelimit()) - dev_warn(dev, "Too many frags\n"); + dev_warn(dev, "Too many slots\n"); err = -E2BIG; } if (unlikely(err)) - np->rx.rsp_cons = cons + frags; + np->rx.rsp_cons = cons + slots; return err; } @@ -736,14 +804,14 @@ static int xennet_set_skb_gso(struct sk_buff *skb, { if (!gso->u.gso.size) { if (net_ratelimit()) - printk(KERN_WARNING "GSO size must not be zero.\n"); + pr_warn("GSO size must not be zero\n"); return -EINVAL; } /* Currently only TCPv4 S.O. is supported. */ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { if (net_ratelimit()) - printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type); + pr_warn("Bad GSO type %d\n", gso->u.gso.type); return -EINVAL; } @@ -762,7 +830,6 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np, struct sk_buff_head *list) { struct skb_shared_info *shinfo = skb_shinfo(skb); - int nr_frags = shinfo->nr_frags; RING_IDX cons = np->rx.rsp_cons; struct sk_buff *nskb; @@ -771,26 +838,27 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np, RING_GET_RESPONSE(&np->rx, ++cons); skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; - __skb_fill_page_desc(skb, nr_frags, - skb_frag_page(nfrag), - rx->offset, rx->status); + if (shinfo->nr_frags == MAX_SKB_FRAGS) { + unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; + + BUG_ON(pull_to <= skb_headlen(skb)); + __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); + } + BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS); - skb->data_len += rx->status; + skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag), + rx->offset, rx->status, PAGE_SIZE); skb_shinfo(nskb)->nr_frags = 0; kfree_skb(nskb); - - nr_frags++; } - shinfo->nr_frags = nr_frags; return cons; } static int checksum_setup(struct net_device *dev, struct sk_buff *skb) { struct iphdr *iph; - unsigned char *th; int err = -EPROTO; int recalculate_partial_csum = 0; @@ -815,27 +883,27 @@ static int checksum_setup(struct net_device *dev, struct sk_buff *skb) goto out; iph = (void *)skb->data; - th = skb->data + 4 * iph->ihl; - if (th >= skb_tail_pointer(skb)) - goto out; - skb->csum_start = th - skb->head; switch (iph->protocol) { case IPPROTO_TCP: - skb->csum_offset = offsetof(struct tcphdr, check); + if (!skb_partial_csum_set(skb, 4 * iph->ihl, + offsetof(struct tcphdr, check))) + goto out; if (recalculate_partial_csum) { - struct tcphdr *tcph = (struct tcphdr *)th; + struct tcphdr *tcph = tcp_hdr(skb); tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - iph->ihl*4, IPPROTO_TCP, 0); } break; case IPPROTO_UDP: - skb->csum_offset = offsetof(struct udphdr, check); + if (!skb_partial_csum_set(skb, 4 * iph->ihl, + offsetof(struct udphdr, check))) + goto out; if (recalculate_partial_csum) { - struct udphdr *udph = (struct udphdr *)th; + struct udphdr *udph = udp_hdr(skb); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - iph->ihl*4, IPPROTO_UDP, 0); @@ -843,15 +911,11 @@ static int checksum_setup(struct net_device *dev, struct sk_buff *skb) break; default: if (net_ratelimit()) - printk(KERN_ERR "Attempting to checksum a non-" - "TCP/UDP packet, dropping a protocol" - " %d packet", iph->protocol); + pr_err("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", + iph->protocol); goto out; } - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) - goto out; - err = 0; out: @@ -867,15 +931,10 @@ static int handle_incoming_queue(struct net_device *dev, struct sk_buff *skb; while ((skb = __skb_dequeue(rxq)) != NULL) { - struct page *page = NETFRONT_SKB_CB(skb)->page; - void *vaddr = page_address(page); - unsigned offset = NETFRONT_SKB_CB(skb)->offset; - - memcpy(skb->data, vaddr + offset, - skb_headlen(skb)); + int pull_to = NETFRONT_SKB_CB(skb)->pull_to; - if (page != skb_frag_page(&skb_shinfo(skb)->frags[0])) - __free_page(page); + if (pull_to > skb_headlen(skb)) + __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); /* Ethernet work: Delayed to here as it peeks the header. */ skb->protocol = eth_type_trans(skb, dev); @@ -913,7 +972,6 @@ static int xennet_poll(struct napi_struct *napi, int budget) struct sk_buff_head errq; struct sk_buff_head tmpq; unsigned long flags; - unsigned int len; int err; spin_lock(&np->rx_lock); @@ -955,53 +1013,17 @@ err: } } - NETFRONT_SKB_CB(skb)->page = - skb_frag_page(&skb_shinfo(skb)->frags[0]); - NETFRONT_SKB_CB(skb)->offset = rx->offset; + NETFRONT_SKB_CB(skb)->pull_to = rx->status; + if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD) + NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD; - len = rx->status; - if (len > RX_COPY_THRESHOLD) - len = RX_COPY_THRESHOLD; - skb_put(skb, len); - - if (rx->status > len) { - skb_shinfo(skb)->frags[0].page_offset = - rx->offset + len; - skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status - len); - skb->data_len = rx->status - len; - } else { - __skb_fill_page_desc(skb, 0, NULL, 0, 0); - skb_shinfo(skb)->nr_frags = 0; - } + skb_shinfo(skb)->frags[0].page_offset = rx->offset; + skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status); + skb->data_len = rx->status; + skb->len += rx->status; i = xennet_fill_frags(np, skb, &tmpq); - /* - * Truesize approximates the size of true data plus - * any supervisor overheads. Adding hypervisor - * overheads has been shown to significantly reduce - * achievable bandwidth with the default receive - * buffer size. It is therefore not wise to account - * for it here. - * - * After alloc_skb(RX_COPY_THRESHOLD), truesize is set - * to RX_COPY_THRESHOLD + the supervisor - * overheads. Here, we add the size of the data pulled - * in xennet_fill_frags(). - * - * We also adjust for any unused space in the main - * data area by subtracting (RX_COPY_THRESHOLD - - * len). This is especially important with drivers - * which split incoming packets into header and data, - * using only 66 bytes of the main data area (see the - * e1000 driver for example.) On such systems, - * without this last adjustement, our achievable - * receive throughout using the standard receive - * buffer size was cut by 25%(!!!). - */ - skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); - skb->len += skb->data_len; - if (rx->flags & XEN_NETRXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (rx->flags & XEN_NETRXF_data_validated) @@ -1045,7 +1067,8 @@ err: static int xennet_change_mtu(struct net_device *dev, int mtu) { - int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + int max = xennet_can_sg(dev) ? + XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN; if (mtu > max) return -EINVAL; @@ -1230,23 +1253,35 @@ static int xennet_set_features(struct net_device *dev, return 0; } -static irqreturn_t xennet_interrupt(int irq, void *dev_id) +static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) { - struct net_device *dev = dev_id; - struct netfront_info *np = netdev_priv(dev); + struct netfront_info *np = dev_id; + struct net_device *dev = np->netdev; unsigned long flags; spin_lock_irqsave(&np->tx_lock, flags); + xennet_tx_buf_gc(dev); + spin_unlock_irqrestore(&np->tx_lock, flags); - if (likely(netif_carrier_ok(dev))) { - xennet_tx_buf_gc(dev); - /* Under tx_lock: protects access to rx shared-ring indexes. */ - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + return IRQ_HANDLED; +} + +static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +{ + struct netfront_info *np = dev_id; + struct net_device *dev = np->netdev; + + if (likely(netif_carrier_ok(dev) && + RING_HAS_UNCONSUMED_RESPONSES(&np->rx))) napi_schedule(&np->napi); - } - spin_unlock_irqrestore(&np->tx_lock, flags); + return IRQ_HANDLED; +} +static irqreturn_t xennet_interrupt(int irq, void *dev_id) +{ + xennet_tx_interrupt(irq, dev_id); + xennet_rx_interrupt(irq, dev_id); return IRQ_HANDLED; } @@ -1273,7 +1308,7 @@ static const struct net_device_ops xennet_netdev_ops = { #endif }; -static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev) +static struct net_device *xennet_create_dev(struct xenbus_device *dev) { int i, err; struct net_device *netdev; @@ -1319,14 +1354,14 @@ static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev /* A grant for every tx ring slot */ if (gnttab_alloc_grant_references(TX_MAX_TARGET, &np->gref_tx_head) < 0) { - printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); + pr_alert("can't alloc tx grant refs\n"); err = -ENOMEM; goto exit_free_stats; } /* A grant for every rx ring slot */ if (gnttab_alloc_grant_references(RX_MAX_TARGET, &np->gref_rx_head) < 0) { - printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); + pr_alert("can't alloc rx grant refs\n"); err = -ENOMEM; goto exit_free_tx; } @@ -1349,6 +1384,8 @@ static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); SET_NETDEV_DEV(netdev, &dev->dev); + netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER); + np->netdev = netdev; netif_carrier_off(netdev); @@ -1369,8 +1406,8 @@ static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev * structures and the ring buffers for communication with the backend, and * inform the backend of the appropriate details for those. */ -static int __devinit netfront_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) +static int netfront_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) { int err; struct net_device *netdev; @@ -1388,16 +1425,14 @@ static int __devinit netfront_probe(struct xenbus_device *dev, err = register_netdev(info->netdev); if (err) { - printk(KERN_WARNING "%s: register_netdev err=%d\n", - __func__, err); + pr_warn("%s: register_netdev err=%d\n", __func__, err); goto fail; } err = xennet_sysfs_addif(info->netdev); if (err) { unregister_netdev(info->netdev); - printk(KERN_WARNING "%s: add sysfs failed err=%d\n", - __func__, err); + pr_warn("%s: add sysfs failed err=%d\n", __func__, err); goto fail; } @@ -1425,9 +1460,14 @@ static void xennet_disconnect_backend(struct netfront_info *info) spin_unlock_irq(&info->tx_lock); spin_unlock_bh(&info->rx_lock); - if (info->netdev->irq) - unbind_from_irqhandler(info->netdev->irq, info->netdev); - info->evtchn = info->netdev->irq = 0; + if (info->tx_irq && (info->tx_irq == info->rx_irq)) + unbind_from_irqhandler(info->tx_irq, info); + if (info->tx_irq && (info->tx_irq != info->rx_irq)) { + unbind_from_irqhandler(info->tx_irq, info); + unbind_from_irqhandler(info->rx_irq, info); + } + info->tx_evtchn = info->rx_evtchn = 0; + info->tx_irq = info->rx_irq = 0; /* End access and free the pages */ xennet_end_access(info->tx_ring_ref, info->tx.sring); @@ -1477,12 +1517,82 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) return 0; } +static int setup_netfront_single(struct netfront_info *info) +{ + int err; + + err = xenbus_alloc_evtchn(info->xbdev, &info->tx_evtchn); + if (err < 0) + goto fail; + + err = bind_evtchn_to_irqhandler(info->tx_evtchn, + xennet_interrupt, + 0, info->netdev->name, info); + if (err < 0) + goto bind_fail; + info->rx_evtchn = info->tx_evtchn; + info->rx_irq = info->tx_irq = err; + + return 0; + +bind_fail: + xenbus_free_evtchn(info->xbdev, info->tx_evtchn); + info->tx_evtchn = 0; +fail: + return err; +} + +static int setup_netfront_split(struct netfront_info *info) +{ + int err; + + err = xenbus_alloc_evtchn(info->xbdev, &info->tx_evtchn); + if (err < 0) + goto fail; + err = xenbus_alloc_evtchn(info->xbdev, &info->rx_evtchn); + if (err < 0) + goto alloc_rx_evtchn_fail; + + snprintf(info->tx_irq_name, sizeof(info->tx_irq_name), + "%s-tx", info->netdev->name); + err = bind_evtchn_to_irqhandler(info->tx_evtchn, + xennet_tx_interrupt, + 0, info->tx_irq_name, info); + if (err < 0) + goto bind_tx_fail; + info->tx_irq = err; + + snprintf(info->rx_irq_name, sizeof(info->rx_irq_name), + "%s-rx", info->netdev->name); + err = bind_evtchn_to_irqhandler(info->rx_evtchn, + xennet_rx_interrupt, + 0, info->rx_irq_name, info); + if (err < 0) + goto bind_rx_fail; + info->rx_irq = err; + + return 0; + +bind_rx_fail: + unbind_from_irqhandler(info->tx_irq, info); + info->tx_irq = 0; +bind_tx_fail: + xenbus_free_evtchn(info->xbdev, info->rx_evtchn); + info->rx_evtchn = 0; +alloc_rx_evtchn_fail: + xenbus_free_evtchn(info->xbdev, info->tx_evtchn); + info->tx_evtchn = 0; +fail: + return err; +} + static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) { struct xen_netif_tx_sring *txs; struct xen_netif_rx_sring *rxs; int err; struct net_device *netdev = info->netdev; + unsigned int feature_split_evtchn; info->tx_ring_ref = GRANT_INVALID_REF; info->rx_ring_ref = GRANT_INVALID_REF; @@ -1490,6 +1600,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) info->tx.sring = NULL; netdev->irq = 0; + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-split-event-channels", "%u", + &feature_split_evtchn); + if (err < 0) + feature_split_evtchn = 0; + err = xen_net_read_mac(dev, netdev->dev_addr); if (err) { xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); @@ -1506,40 +1622,50 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); err = xenbus_grant_ring(dev, virt_to_mfn(txs)); - if (err < 0) { - free_page((unsigned long)txs); - goto fail; - } + if (err < 0) + goto grant_tx_ring_fail; info->tx_ring_ref = err; rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); if (!rxs) { err = -ENOMEM; xenbus_dev_fatal(dev, err, "allocating rx ring page"); - goto fail; + goto alloc_rx_ring_fail; } SHARED_RING_INIT(rxs); FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); - if (err < 0) { - free_page((unsigned long)rxs); - goto fail; - } + if (err < 0) + goto grant_rx_ring_fail; info->rx_ring_ref = err; - err = xenbus_alloc_evtchn(dev, &info->evtchn); + if (feature_split_evtchn) + err = setup_netfront_split(info); + /* setup single event channel if + * a) feature-split-event-channels == 0 + * b) feature-split-event-channels == 1 but failed to setup + */ + if (!feature_split_evtchn || (feature_split_evtchn && err)) + err = setup_netfront_single(info); + if (err) - goto fail; + goto alloc_evtchn_fail; - err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt, - 0, netdev->name, netdev); - if (err < 0) - goto fail; - netdev->irq = err; return 0; - fail: + /* If we fail to setup netfront, it is safe to just revoke access to + * granted pages because backend is not accessing it at this point. + */ +alloc_evtchn_fail: + gnttab_end_foreign_access_ref(info->rx_ring_ref, 0); +grant_rx_ring_fail: + free_page((unsigned long)rxs); +alloc_rx_ring_fail: + gnttab_end_foreign_access_ref(info->tx_ring_ref, 0); +grant_tx_ring_fail: + free_page((unsigned long)txs); +fail: return err; } @@ -1575,11 +1701,27 @@ again: message = "writing rx ring-ref"; goto abort_transaction; } - err = xenbus_printf(xbt, dev->nodename, - "event-channel", "%u", info->evtchn); - if (err) { - message = "writing event-channel"; - goto abort_transaction; + + if (info->tx_evtchn == info->rx_evtchn) { + err = xenbus_printf(xbt, dev->nodename, + "event-channel", "%u", info->tx_evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + } else { + err = xenbus_printf(xbt, dev->nodename, + "event-channel-tx", "%u", info->tx_evtchn); + if (err) { + message = "writing event-channel-tx"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, + "event-channel-rx", "%u", info->rx_evtchn); + if (err) { + message = "writing event-channel-rx"; + goto abort_transaction; + } } err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u", @@ -1692,7 +1834,9 @@ static int xennet_connect(struct net_device *dev) * packets. */ netif_carrier_on(np->netdev); - notify_remote_via_irq(np->netdev->irq); + notify_remote_via_irq(np->tx_irq); + if (np->tx_irq != np->rx_irq) + notify_remote_via_irq(np->rx_irq); xennet_tx_buf_gc(dev); xennet_alloc_rx_buffers(dev); @@ -1731,7 +1875,7 @@ static void netback_changed(struct xenbus_device *dev, break; case XenbusStateConnected: - netif_notify_peers(netdev); + netdev_notify_peers(netdev); break; case XenbusStateClosing: @@ -1929,7 +2073,7 @@ static const struct xenbus_device_id netfront_ids[] = { }; -static int __devexit xennet_remove(struct xenbus_device *dev) +static int xennet_remove(struct xenbus_device *dev) { struct netfront_info *info = dev_get_drvdata(&dev->dev); @@ -1952,7 +2096,7 @@ static int __devexit xennet_remove(struct xenbus_device *dev) static DEFINE_XENBUS_DRIVER(netfront, , .probe = netfront_probe, - .remove = __devexit_p(xennet_remove), + .remove = xennet_remove, .resume = netfront_resume, .otherend_changed = netback_changed, ); @@ -1965,7 +2109,7 @@ static int __init netif_init(void) if (xen_hvm_domain() && !xen_platform_pci_unplug) return -ENODEV; - printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); + pr_info("Initialising Xen virtual ethernet driver\n"); return xenbus_register_frontend(&netfront_driver); } |