diff options
Diffstat (limited to 'drivers/net/ethernet/cisco/enic/enic_main.c')
| -rw-r--r-- | drivers/net/ethernet/cisco/enic/enic_main.c | 1267 |
1 files changed, 610 insertions, 657 deletions
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index d24ee1ad3be1..6bc8dfdb3d4b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -46,6 +46,7 @@ #include <linux/crash_dump.h> #include <net/busy_poll.h> #include <net/vxlan.h> +#include <net/netdev_queues.h> #include "cq_enet_desc.h" #include "vnic_dev.h" @@ -57,18 +58,15 @@ #include "enic_dev.h" #include "enic_pp.h" #include "enic_clsf.h" +#include "enic_rq.h" +#include "enic_wq.h" #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) -#define WQ_ENET_MAX_DESC_LEN (1 << WQ_ENET_LEN_BITS) -#define MAX_TSO (1 << 16) -#define ENIC_DESC_MAX_SPLITS (MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1) #define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ -#define RX_COPYBREAK_DEFAULT 256 - /* Supported devices */ static const struct pci_device_id enic_id_table[] = { { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) }, @@ -80,7 +78,6 @@ static const struct pci_device_id enic_id_table[] = { MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("Scott Feldman <scofeldm@cisco.com>"); MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, enic_id_table); #define ENIC_LARGE_PKT_THRESHOLD 1000 @@ -109,7 +106,7 @@ static struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = { static struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = { {0, 0}, /* 0 - 4 Gbps */ {0, 3}, /* 4 - 10 Gbps */ - {3, 6}, /* 10 - 40 Gbps */ + {3, 6}, /* 10+ Gbps */ }; static void enic_init_affinity_hint(struct enic *enic) @@ -119,7 +116,7 @@ static void enic_init_affinity_hint(struct enic *enic) for (i = 0; i < enic->intr_count; i++) { if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) || - (enic->msix[i].affinity_mask && + (cpumask_available(enic->msix[i].affinity_mask) && !cpumask_empty(enic->msix[i].affinity_mask))) continue; if (zalloc_cpumask_var(&enic->msix[i].affinity_mask, @@ -148,20 +145,20 @@ static void enic_set_affinity_hint(struct enic *enic) for (i = 0; i < enic->intr_count; i++) { if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) || - !enic->msix[i].affinity_mask || + !cpumask_available(enic->msix[i].affinity_mask) || cpumask_empty(enic->msix[i].affinity_mask)) continue; - err = irq_set_affinity_hint(enic->msix_entry[i].vector, - enic->msix[i].affinity_mask); + err = irq_update_affinity_hint(enic->msix_entry[i].vector, + enic->msix[i].affinity_mask); if (err) - netdev_warn(enic->netdev, "irq_set_affinity_hint failed, err %d\n", + netdev_warn(enic->netdev, "irq_update_affinity_hint failed, err %d\n", err); } for (i = 0; i < enic->wq_count; i++) { int wq_intr = enic_msix_wq_intr(enic, i); - if (enic->msix[wq_intr].affinity_mask && + if (cpumask_available(enic->msix[wq_intr].affinity_mask) && !cpumask_empty(enic->msix[wq_intr].affinity_mask)) netif_set_xps_queue(enic->netdev, enic->msix[wq_intr].affinity_mask, @@ -174,40 +171,21 @@ static void enic_unset_affinity_hint(struct enic *enic) int i; for (i = 0; i < enic->intr_count; i++) - irq_set_affinity_hint(enic->msix_entry[i].vector, NULL); + irq_update_affinity_hint(enic->msix_entry[i].vector, NULL); } -static void enic_udp_tunnel_add(struct net_device *netdev, - struct udp_tunnel_info *ti) +static int enic_udp_tunnel_set_port(struct net_device *netdev, + unsigned int table, unsigned int entry, + struct udp_tunnel_info *ti) { struct enic *enic = netdev_priv(netdev); - __be16 port = ti->port; int err; spin_lock_bh(&enic->devcmd_lock); - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) { - netdev_info(netdev, "udp_tnl: only vxlan tunnel offload supported"); - goto error; - } - - if (ti->sa_family != AF_INET) { - netdev_info(netdev, "vxlan: only IPv4 offload supported"); - goto error; - } - - if (enic->vxlan.vxlan_udp_port_number) { - if (ntohs(port) == enic->vxlan.vxlan_udp_port_number) - netdev_warn(netdev, "vxlan: udp port already offloaded"); - else - netdev_info(netdev, "vxlan: offload supported for only one UDP port"); - - goto error; - } - err = vnic_dev_overlay_offload_cfg(enic->vdev, OVERLAY_CFG_VXLAN_PORT_UPDATE, - ntohs(port)); + ntohs(ti->port)); if (err) goto error; @@ -216,53 +194,50 @@ static void enic_udp_tunnel_add(struct net_device *netdev, if (err) goto error; - enic->vxlan.vxlan_udp_port_number = ntohs(port); - - netdev_info(netdev, "vxlan fw-vers-%d: offload enabled for udp port: %d, sa_family: %d ", - (int)enic->vxlan.patch_level, ntohs(port), ti->sa_family); - - goto unlock; - + enic->vxlan.vxlan_udp_port_number = ntohs(ti->port); error: - netdev_info(netdev, "failed to offload udp port: %d, sa_family: %d, type: %d", - ntohs(port), ti->sa_family, ti->type); -unlock: spin_unlock_bh(&enic->devcmd_lock); + + return err; } -static void enic_udp_tunnel_del(struct net_device *netdev, - struct udp_tunnel_info *ti) +static int enic_udp_tunnel_unset_port(struct net_device *netdev, + unsigned int table, unsigned int entry, + struct udp_tunnel_info *ti) { struct enic *enic = netdev_priv(netdev); int err; spin_lock_bh(&enic->devcmd_lock); - if ((ti->sa_family != AF_INET) || - ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number)) || - (ti->type != UDP_TUNNEL_TYPE_VXLAN)) { - netdev_info(netdev, "udp_tnl: port:%d, sa_family: %d, type: %d not offloaded", - ntohs(ti->port), ti->sa_family, ti->type); - goto unlock; - } - err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, OVERLAY_OFFLOAD_DISABLE); - if (err) { - netdev_err(netdev, "vxlan: del offload udp port: %d failed", - ntohs(ti->port)); + if (err) goto unlock; - } enic->vxlan.vxlan_udp_port_number = 0; - netdev_info(netdev, "vxlan: del offload udp port %d, family %d\n", - ntohs(ti->port), ti->sa_family); - unlock: spin_unlock_bh(&enic->devcmd_lock); + + return err; } +static const struct udp_tunnel_nic_info enic_udp_tunnels = { + .set_port = enic_udp_tunnel_set_port, + .unset_port = enic_udp_tunnel_unset_port, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + }, +}, enic_udp_tunnels_v4 = { + .set_port = enic_udp_tunnel_set_port, + .unset_port = enic_udp_tunnel_unset_port, + .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + }, +}; + static netdev_features_t enic_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) @@ -271,22 +246,37 @@ static netdev_features_t enic_features_check(struct sk_buff *skb, struct enic *enic = netdev_priv(dev); struct udphdr *udph; u16 port = 0; - u16 proto; + u8 proto; if (!skb->encapsulation) return features; features = vxlan_features_check(skb, features); - /* hardware only supports IPv4 vxlan tunnel */ - if (vlan_get_protocol(skb) != htons(ETH_P_IP)) + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IPV6): + if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) + goto out; + proto = ipv6_hdr(skb)->nexthdr; + break; + case htons(ETH_P_IP): + proto = ip_hdr(skb)->protocol; + break; + default: goto out; + } - /* hardware does not support offload of ipv6 inner pkt */ - if (eth->h_proto != ntohs(ETH_P_IP)) + switch (eth->h_proto) { + case ntohs(ETH_P_IPV6): + if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6)) + goto out; + fallthrough; + case ntohs(ETH_P_IP): + break; + default: goto out; + } - proto = ip_hdr(skb)->protocol; if (proto == IPPROTO_UDP) { udph = udp_hdr(skb); @@ -329,48 +319,6 @@ int enic_is_valid_vf(struct enic *enic, int vf) #endif } -static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf) -{ - struct enic *enic = vnic_dev_priv(wq->vdev); - - if (buf->sop) - pci_unmap_single(enic->pdev, buf->dma_addr, - buf->len, PCI_DMA_TODEVICE); - else - pci_unmap_page(enic->pdev, buf->dma_addr, - buf->len, PCI_DMA_TODEVICE); - - if (buf->os_buf) - dev_kfree_skb_any(buf->os_buf); -} - -static void enic_wq_free_buf(struct vnic_wq *wq, - struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque) -{ - enic_free_wq_buf(wq, buf); -} - -static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, - u8 type, u16 q_number, u16 completed_index, void *opaque) -{ - struct enic *enic = vnic_dev_priv(vdev); - - spin_lock(&enic->wq_lock[q_number]); - - vnic_wq_service(&enic->wq[q_number], cq_desc, - completed_index, enic_wq_free_buf, - opaque); - - if (netif_tx_queue_stopped(netdev_get_tx_queue(enic->netdev, q_number)) && - vnic_wq_desc_avail(&enic->wq[q_number]) >= - (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)) - netif_wake_subqueue(enic->netdev, q_number); - - spin_unlock(&enic->wq_lock[q_number]); - - return 0; -} - static bool enic_log_q_error(struct enic *enic) { unsigned int i; @@ -378,7 +326,7 @@ static bool enic_log_q_error(struct enic *enic) bool err = false; for (i = 0; i < enic->wq_count; i++) { - error_status = vnic_wq_error_status(&enic->wq[i]); + error_status = vnic_wq_error_status(&enic->wq[i].vwq); err |= error_status; if (error_status) netdev_err(enic->netdev, "WQ[%d] error_status %d\n", @@ -386,7 +334,7 @@ static bool enic_log_q_error(struct enic *enic) } for (i = 0; i < enic->rq_count; i++) { - error_status = vnic_rq_error_status(&enic->rq[i]); + error_status = vnic_rq_error_status(&enic->rq[i].vrq); err |= error_status; if (error_status) netdev_err(enic->netdev, "RQ[%d] error_status %d\n", @@ -429,6 +377,36 @@ static void enic_mtu_check(struct enic *enic) } } +static void enic_set_rx_coal_setting(struct enic *enic) +{ + unsigned int speed; + int index = -1; + struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; + + /* 1. Read the link speed from fw + * 2. Pick the default range for the speed + * 3. Update it in enic->rx_coalesce_setting + */ + speed = vnic_dev_port_speed(enic->vdev); + if (speed > ENIC_LINK_SPEED_10G) + index = ENIC_LINK_40G_INDEX; + else if (speed > ENIC_LINK_SPEED_4G) + index = ENIC_LINK_10G_INDEX; + else + index = ENIC_LINK_4G_INDEX; + + rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start; + rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start; + rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END; + + /* Start with the value provided by UCSM */ + for (index = 0; index < enic->rq_count; index++) + enic->cq[index].cur_rx_coal_timeval = + enic->config.intr_timer_usec; + + rx_coal->use_adaptive_rx_coalesce = 1; +} + static void enic_link_check(struct enic *enic) { int link_status = vnic_dev_link_status(enic->vdev); @@ -437,6 +415,7 @@ static void enic_link_check(struct enic *enic) if (link_status && !carrier_ok) { netdev_info(enic->netdev, "Link UP\n"); netif_carrier_on(enic->netdev); + enic_set_rx_coal_setting(enic); } else if (!link_status && carrier_ok) { netdev_info(enic->netdev, "Link DOWN\n"); netif_carrier_off(enic->netdev); @@ -456,9 +435,9 @@ static irqreturn_t enic_isr_legacy(int irq, void *data) { struct net_device *netdev = data; struct enic *enic = netdev_priv(netdev); - unsigned int io_intr = enic_legacy_io_intr(); - unsigned int err_intr = enic_legacy_err_intr(); - unsigned int notify_intr = enic_legacy_notify_intr(); + unsigned int io_intr = ENIC_LEGACY_IO_INTR; + unsigned int err_intr = ENIC_LEGACY_ERR_INTR; + unsigned int notify_intr = ENIC_LEGACY_NOTIFY_INTR; u32 pba; vnic_intr_mask(&enic->intr[io_intr]); @@ -582,8 +561,8 @@ static int enic_queue_wq_skb_vlan(struct enic *enic, struct vnic_wq *wq, dma_addr_t dma_addr; int err = 0; - dma_addr = pci_map_single(enic->pdev, skb->data, head_len, - PCI_DMA_TODEVICE); + dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, + DMA_TO_DEVICE); if (unlikely(enic_dma_map_check(enic, dma_addr))) return -ENOMEM; @@ -598,6 +577,11 @@ static int enic_queue_wq_skb_vlan(struct enic *enic, struct vnic_wq *wq, if (!eop) err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + /* The enic_queue_wq_desc() above does not do HW checksum */ + enic->wq[wq->index].stats.csum_none++; + enic->wq[wq->index].stats.packets++; + enic->wq[wq->index].stats.bytes += skb->len; + return err; } @@ -613,8 +597,8 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq, dma_addr_t dma_addr; int err = 0; - dma_addr = pci_map_single(enic->pdev, skb->data, head_len, - PCI_DMA_TODEVICE); + dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, + DMA_TO_DEVICE); if (unlikely(enic_dma_map_check(enic, dma_addr))) return -ENOMEM; @@ -630,17 +614,34 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq, if (!eop) err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + enic->wq[wq->index].stats.csum_partial++; + enic->wq[wq->index].stats.packets++; + enic->wq[wq->index].stats.bytes += skb->len; + return err; } static void enic_preload_tcp_csum_encap(struct sk_buff *skb) { - if (skb->protocol == cpu_to_be16(ETH_P_IP)) { + const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); + + switch (eth->h_proto) { + case ntohs(ETH_P_IP): inner_ip_hdr(skb)->check = 0; inner_tcp_hdr(skb)->check = ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr, inner_ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + break; + case ntohs(ETH_P_IPV6): + inner_tcp_hdr(skb)->check = + ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr, + &inner_ipv6_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + break; + default: + WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload"); + break; } } @@ -656,8 +657,7 @@ static void enic_preload_tcp_csum(struct sk_buff *skb) tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) { - tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + tcp_v6_gso_csum_prep(skb); } } @@ -672,16 +672,18 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, unsigned int offset = 0; unsigned int hdr_len; dma_addr_t dma_addr; + unsigned int pkts; unsigned int len; skb_frag_t *frag; if (skb->encapsulation) { - hdr_len = skb_inner_transport_header(skb) - skb->data; - hdr_len += inner_tcp_hdrlen(skb); + hdr_len = skb_inner_tcp_all_headers(skb); enic_preload_tcp_csum_encap(skb); + enic->wq[wq->index].stats.encap_tso++; } else { - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); enic_preload_tcp_csum(skb); + enic->wq[wq->index].stats.tso++; } /* Queue WQ_ENET_MAX_DESC_LEN length descriptors @@ -689,8 +691,9 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, */ while (frag_len_left) { len = min(frag_len_left, (unsigned int)WQ_ENET_MAX_DESC_LEN); - dma_addr = pci_map_single(enic->pdev, skb->data + offset, len, - PCI_DMA_TODEVICE); + dma_addr = dma_map_single(&enic->pdev->dev, + skb->data + offset, len, + DMA_TO_DEVICE); if (unlikely(enic_dma_map_check(enic, dma_addr))) return -ENOMEM; enic_queue_wq_desc_tso(wq, skb, dma_addr, len, mss, hdr_len, @@ -701,7 +704,7 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, } if (eop) - return 0; + goto tso_out_stats; /* Queue WQ_ENET_MAX_DESC_LEN length descriptors * for additional data fragments @@ -728,6 +731,15 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, } } +tso_out_stats: + /* calculate how many packets tso sent */ + len = skb->len - hdr_len; + pkts = len / mss; + if ((len % mss) > 0) + pkts++; + enic->wq[wq->index].stats.packets += pkts; + enic->wq[wq->index].stats.bytes += (len + (pkts * hdr_len)); + return 0; } @@ -748,8 +760,8 @@ static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq, dma_addr_t dma_addr; int err = 0; - dma_addr = pci_map_single(enic->pdev, skb->data, head_len, - PCI_DMA_TODEVICE); + dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, + DMA_TO_DEVICE); if (unlikely(enic_dma_map_check(enic, dma_addr))) return -ENOMEM; @@ -760,10 +772,14 @@ static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq, if (!eop) err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + enic->wq[wq->index].stats.encap_csum++; + enic->wq[wq->index].stats.packets++; + enic->wq[wq->index].stats.bytes += skb->len; + return err; } -static inline void enic_queue_wq_skb(struct enic *enic, +static inline int enic_queue_wq_skb(struct enic *enic, struct vnic_wq *wq, struct sk_buff *skb) { unsigned int mss = skb_shinfo(skb)->gso_size; @@ -776,6 +792,7 @@ static inline void enic_queue_wq_skb(struct enic *enic, /* VLAN tag from trunking driver */ vlan_tag_insert = 1; vlan_tag = skb_vlan_tag_get(skb); + enic->wq[wq->index].stats.add_vlan++; } else if (enic->loop_enable) { vlan_tag = enic->loop_tag; loopback = 1; @@ -788,7 +805,7 @@ static inline void enic_queue_wq_skb(struct enic *enic, else if (skb->encapsulation) err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert, vlan_tag, loopback); - else if (skb->ip_summed == CHECKSUM_PARTIAL) + else if (skb->ip_summed == CHECKSUM_PARTIAL) err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert, vlan_tag, loopback); else @@ -809,6 +826,7 @@ static inline void enic_queue_wq_skb(struct enic *enic, wq->to_use = buf->next; dev_kfree_skb(skb); } + return err; } /* netif_tx_lock held, process context with BHs disabled, or BH */ @@ -820,13 +838,15 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, unsigned int txq_map; struct netdev_queue *txq; + txq_map = skb_get_queue_mapping(skb) % enic->wq_count; + wq = &enic->wq[txq_map].vwq; + if (skb->len <= 0) { dev_kfree_skb_any(skb); + enic->wq[wq->index].stats.null_pkt++; return NETDEV_TX_OK; } - txq_map = skb_get_queue_mapping(skb) % enic->wq_count; - wq = &enic->wq[txq_map]; txq = netdev_get_tx_queue(netdev, txq_map); /* Non-TSO sends must fit within ENIC_NON_TSO_MAX_DESC descs, @@ -838,42 +858,52 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, skb_shinfo(skb)->nr_frags + 1 > ENIC_NON_TSO_MAX_DESC && skb_linearize(skb)) { dev_kfree_skb_any(skb); + enic->wq[wq->index].stats.skb_linear_fail++; return NETDEV_TX_OK; } - spin_lock(&enic->wq_lock[txq_map]); + spin_lock(&enic->wq[txq_map].lock); if (vnic_wq_desc_avail(wq) < skb_shinfo(skb)->nr_frags + ENIC_DESC_MAX_SPLITS) { netif_tx_stop_queue(txq); /* This is a hard error, log it */ netdev_err(netdev, "BUG! Tx ring full when queue awake!\n"); - spin_unlock(&enic->wq_lock[txq_map]); + spin_unlock(&enic->wq[txq_map].lock); + enic->wq[wq->index].stats.desc_full_awake++; return NETDEV_TX_BUSY; } - enic_queue_wq_skb(enic, wq, skb); + if (enic_queue_wq_skb(enic, wq, skb)) + goto error; - if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS) + if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS) { netif_tx_stop_queue(txq); - if (!skb->xmit_more || netif_xmit_stopped(txq)) + enic->wq[wq->index].stats.stopped++; + } + skb_tx_timestamp(skb); + if (!netdev_xmit_more() || netif_xmit_stopped(txq)) vnic_wq_doorbell(wq); - spin_unlock(&enic->wq_lock[txq_map]); +error: + spin_unlock(&enic->wq[txq_map].lock); return NETDEV_TX_OK; } -/* dev_base_lock rwlock held, nominally process context */ +/* rcu_read_lock potentially held, nominally process context */ static void enic_get_stats(struct net_device *netdev, struct rtnl_link_stats64 *net_stats) { struct enic *enic = netdev_priv(netdev); struct vnic_stats *stats; + u64 pkt_truncated = 0; + u64 bad_fcs = 0; int err; + int i; err = enic_dev_stats_dump(enic, &stats); - /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump + /* return only when dma_alloc_coherent fails in vnic_dev_stats_dump * For other failures, like devcmd failure, we return previously * recorded stats. */ @@ -889,8 +919,17 @@ static void enic_get_stats(struct net_device *netdev, net_stats->rx_bytes = stats->rx.rx_bytes_ok; net_stats->rx_errors = stats->rx.rx_errors; net_stats->multicast = stats->rx.rx_multicast_frames_ok; - net_stats->rx_over_errors = enic->rq_truncated_pkts; - net_stats->rx_crc_errors = enic->rq_bad_fcs; + + for (i = 0; i < enic->rq_count; i++) { + struct enic_rq_stats *rqs = &enic->rq[i].stats; + + if (!enic->rq[i].vrq.ctrl) + break; + pkt_truncated += rqs->pkt_truncated; + bad_fcs += rqs->bad_fcs; + } + net_stats->rx_over_errors = pkt_truncated; + net_stats->rx_crc_errors = bad_fcs; net_stats->rx_dropped = stats->rx.rx_no_bufs + stats->rx.rx_drop; } @@ -976,7 +1015,7 @@ static int enic_set_mac_addr(struct net_device *netdev, char *addr) return -EADDRNOTAVAIL; } - memcpy(netdev->dev_addr, addr, netdev->addr_len); + eth_hw_addr_set(netdev, addr); return 0; } @@ -1054,7 +1093,7 @@ static void enic_set_rx_mode(struct net_device *netdev) } /* netif_tx_lock held, BHs disabled */ -static void enic_tx_timeout(struct net_device *netdev) +static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct enic *enic = netdev_priv(netdev); schedule_work(&enic->tx_hang_reset); @@ -1089,6 +1128,7 @@ static int enic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) static int enic_set_vf_port(struct net_device *netdev, int vf, struct nlattr *port[]) { + static const u8 zero_addr[ETH_ALEN] = {}; struct enic *enic = netdev_priv(netdev); struct enic_port_profile prev_pp; struct enic_port_profile *pp; @@ -1108,18 +1148,30 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]); if (port[IFLA_PORT_PROFILE]) { + if (nla_len(port[IFLA_PORT_PROFILE]) != PORT_PROFILE_MAX) { + memcpy(pp, &prev_pp, sizeof(*pp)); + return -EINVAL; + } pp->set |= ENIC_SET_NAME; memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]), PORT_PROFILE_MAX); } if (port[IFLA_PORT_INSTANCE_UUID]) { + if (nla_len(port[IFLA_PORT_INSTANCE_UUID]) != PORT_UUID_MAX) { + memcpy(pp, &prev_pp, sizeof(*pp)); + return -EINVAL; + } pp->set |= ENIC_SET_INSTANCE; memcpy(pp->instance_uuid, nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX); } if (port[IFLA_PORT_HOST_UUID]) { + if (nla_len(port[IFLA_PORT_HOST_UUID]) != PORT_UUID_MAX) { + memcpy(pp, &prev_pp, sizeof(*pp)); + return -EINVAL; + } pp->set |= ENIC_SET_HOST; memcpy(pp->host_uuid, nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX); @@ -1153,7 +1205,7 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, } else { memset(pp, 0, sizeof(*pp)); if (vf == PORT_SELF_VF) - eth_zero_addr(netdev->dev_addr); + eth_hw_addr_set(netdev, zero_addr); } } else { /* Set flag to indicate that the port assoc/disassoc @@ -1165,7 +1217,7 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, if (pp->request == PORT_REQUEST_DISASSOCIATE) { eth_zero_addr(pp->mac_addr); if (vf == PORT_SELF_VF) - eth_zero_addr(netdev->dev_addr); + eth_hw_addr_set(netdev, zero_addr); } } @@ -1210,229 +1262,6 @@ nla_put_failure: return -EMSGSIZE; } -static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - - if (!buf->os_buf) - return; - - pci_unmap_single(enic->pdev, buf->dma_addr, - buf->len, PCI_DMA_FROMDEVICE); - dev_kfree_skb_any(buf->os_buf); - buf->os_buf = NULL; -} - -static int enic_rq_alloc_buf(struct vnic_rq *rq) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - struct net_device *netdev = enic->netdev; - struct sk_buff *skb; - unsigned int len = netdev->mtu + VLAN_ETH_HLEN; - unsigned int os_buf_index = 0; - dma_addr_t dma_addr; - struct vnic_rq_buf *buf = rq->to_use; - - if (buf->os_buf) { - enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr, - buf->len); - - return 0; - } - skb = netdev_alloc_skb_ip_align(netdev, len); - if (!skb) - return -ENOMEM; - - dma_addr = pci_map_single(enic->pdev, skb->data, len, - PCI_DMA_FROMDEVICE); - if (unlikely(enic_dma_map_check(enic, dma_addr))) { - dev_kfree_skb(skb); - return -ENOMEM; - } - - enic_queue_rq_desc(rq, skb, os_buf_index, - dma_addr, len); - - return 0; -} - -static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size, - u32 pkt_len) -{ - if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len) - pkt_size->large_pkt_bytes_cnt += pkt_len; - else - pkt_size->small_pkt_bytes_cnt += pkt_len; -} - -static bool enic_rxcopybreak(struct net_device *netdev, struct sk_buff **skb, - struct vnic_rq_buf *buf, u16 len) -{ - struct enic *enic = netdev_priv(netdev); - struct sk_buff *new_skb; - - if (len > enic->rx_copybreak) - return false; - new_skb = netdev_alloc_skb_ip_align(netdev, len); - if (!new_skb) - return false; - pci_dma_sync_single_for_cpu(enic->pdev, buf->dma_addr, len, - DMA_FROM_DEVICE); - memcpy(new_skb->data, (*skb)->data, len); - *skb = new_skb; - - return true; -} - -static void enic_rq_indicate_buf(struct vnic_rq *rq, - struct cq_desc *cq_desc, struct vnic_rq_buf *buf, - int skipped, void *opaque) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - struct net_device *netdev = enic->netdev; - struct sk_buff *skb; - struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; - - u8 type, color, eop, sop, ingress_port, vlan_stripped; - u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof; - u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; - u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc; - u8 packet_error; - u16 q_number, completed_index, bytes_written, vlan_tci, checksum; - u32 rss_hash; - bool outer_csum_ok = true, encap = false; - - if (skipped) - return; - - skb = buf->os_buf; - - cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, - &type, &color, &q_number, &completed_index, - &ingress_port, &fcoe, &eop, &sop, &rss_type, - &csum_not_calc, &rss_hash, &bytes_written, - &packet_error, &vlan_stripped, &vlan_tci, &checksum, - &fcoe_sof, &fcoe_fc_crc_ok, &fcoe_enc_error, - &fcoe_eof, &tcp_udp_csum_ok, &udp, &tcp, - &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment, - &fcs_ok); - - if (packet_error) { - - if (!fcs_ok) { - if (bytes_written > 0) - enic->rq_bad_fcs++; - else if (bytes_written == 0) - enic->rq_truncated_pkts++; - } - - pci_unmap_single(enic->pdev, buf->dma_addr, buf->len, - PCI_DMA_FROMDEVICE); - dev_kfree_skb_any(skb); - buf->os_buf = NULL; - - return; - } - - if (eop && bytes_written > 0) { - - /* Good receive - */ - - if (!enic_rxcopybreak(netdev, &skb, buf, bytes_written)) { - buf->os_buf = NULL; - pci_unmap_single(enic->pdev, buf->dma_addr, buf->len, - PCI_DMA_FROMDEVICE); - } - prefetch(skb->data - NET_IP_ALIGN); - - skb_put(skb, bytes_written); - skb->protocol = eth_type_trans(skb, netdev); - skb_record_rx_queue(skb, q_number); - if ((netdev->features & NETIF_F_RXHASH) && rss_hash && - (type == 3)) { - switch (rss_type) { - case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4: - case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6: - case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX: - skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4); - break; - case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4: - case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6: - case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX: - skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3); - break; - } - } - if (enic->vxlan.vxlan_udp_port_number) { - switch (enic->vxlan.patch_level) { - case 0: - if (fcoe) { - encap = true; - outer_csum_ok = fcoe_fc_crc_ok; - } - break; - case 2: - if ((type == 7) && - (rss_hash & BIT(0))) { - encap = true; - outer_csum_ok = (rss_hash & BIT(1)) && - (rss_hash & BIT(2)); - } - break; - } - } - - /* Hardware does not provide whole packet checksum. It only - * provides pseudo checksum. Since hw validates the packet - * checksum but not provide us the checksum value. use - * CHECSUM_UNNECESSARY. - * - * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is - * inner csum_ok. outer_csum_ok is set by hw when outer udp - * csum is correct or is zero. - */ - if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && - tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = encap; - } - - if (vlan_stripped) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); - - skb_mark_napi_id(skb, &enic->napi[rq->index]); - if (!(netdev->features & NETIF_F_GRO)) - netif_receive_skb(skb); - else - napi_gro_receive(&enic->napi[q_number], skb); - if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) - enic_intr_update_pkt_size(&cq->pkt_size_counter, - bytes_written); - } else { - - /* Buffer overflow - */ - - pci_unmap_single(enic->pdev, buf->dma_addr, buf->len, - PCI_DMA_FROMDEVICE); - dev_kfree_skb_any(skb); - buf->os_buf = NULL; - } -} - -static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, - u8 type, u16 q_number, u16 completed_index, void *opaque) -{ - struct enic *enic = vnic_dev_priv(vdev); - - vnic_rq_service(&enic->rq[q_number], cq_desc, - completed_index, VNIC_RQ_RETURN_DESC, - enic_rq_indicate_buf, opaque); - - return 0; -} - static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq) { unsigned int intr = enic_msix_rq_intr(enic, rq->index); @@ -1497,18 +1326,16 @@ static int enic_poll(struct napi_struct *napi, int budget) struct enic *enic = netdev_priv(netdev); unsigned int cq_rq = enic_cq_rq(enic, 0); unsigned int cq_wq = enic_cq_wq(enic, 0); - unsigned int intr = enic_legacy_io_intr(); + unsigned int intr = ENIC_LEGACY_IO_INTR; unsigned int rq_work_to_do = budget; - unsigned int wq_work_to_do = -1; /* no limit */ + unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; unsigned int work_done, rq_work_done = 0, wq_work_done; int err; - wq_work_done = vnic_cq_service(&enic->cq[cq_wq], wq_work_to_do, - enic_wq_service, NULL); + wq_work_done = enic_wq_cq_service(enic, cq_wq, wq_work_to_do); if (budget > 0) - rq_work_done = vnic_cq_service(&enic->cq[cq_rq], - rq_work_to_do, enic_rq_service, NULL); + rq_work_done = enic_rq_cq_service(enic, cq_rq, rq_work_to_do); /* Accumulate intr event credits for this polling * cycle. An intr event is the completion of a @@ -1523,7 +1350,7 @@ static int enic_poll(struct napi_struct *napi, int budget) 0 /* don't unmask intr */, 0 /* don't reset intr timer */); - err = vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); + err = vnic_rq_fill(&enic->rq[0].vrq, enic_rq_alloc_buf); /* Buffer allocation failed. Stay in polling * mode so we can try to fill the ring again. @@ -1535,7 +1362,7 @@ static int enic_poll(struct napi_struct *napi, int budget) /* Call the function which refreshes the intr coalescing timer * value based on the traffic. */ - enic_calc_int_moderation(enic, &enic->rq[0]); + enic_calc_int_moderation(enic, &enic->rq[0].vrq); if ((rq_work_done < budget) && napi_complete_done(napi, rq_work_done)) { @@ -1544,8 +1371,11 @@ static int enic_poll(struct napi_struct *napi, int budget) */ if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) - enic_set_int_moderation(enic, &enic->rq[0]); + enic_set_int_moderation(enic, &enic->rq[0].vrq); vnic_intr_unmask(&enic->intr[intr]); + enic->rq[0].stats.napi_complete++; + } else { + enic->rq[0].stats.napi_repoll++; } return rq_work_done; @@ -1594,18 +1424,18 @@ static int enic_poll_msix_wq(struct napi_struct *napi, int budget) struct net_device *netdev = napi->dev; struct enic *enic = netdev_priv(netdev); unsigned int wq_index = (napi - &enic->napi[0]) - enic->rq_count; - struct vnic_wq *wq = &enic->wq[wq_index]; + struct vnic_wq *wq = &enic->wq[wq_index].vwq; unsigned int cq; unsigned int intr; - unsigned int wq_work_to_do = -1; /* clean all desc possible */ + unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; unsigned int wq_work_done; unsigned int wq_irq; wq_irq = wq->index; cq = enic_cq_wq(enic, wq_irq); intr = enic_msix_wq_intr(enic, wq_irq); - wq_work_done = vnic_cq_service(&enic->cq[cq], wq_work_to_do, - enic_wq_service, NULL); + + wq_work_done = enic_wq_cq_service(enic, cq, wq_work_to_do); vnic_intr_return_credits(&enic->intr[intr], wq_work_done, 0 /* don't unmask intr */, @@ -1634,8 +1464,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) */ if (budget > 0) - work_done = vnic_cq_service(&enic->cq[cq], - work_to_do, enic_rq_service, NULL); + work_done = enic_rq_cq_service(enic, cq, work_to_do); /* Return intr event credits for this polling * cycle. An intr event is the completion of a @@ -1648,7 +1477,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) 0 /* don't unmask intr */, 0 /* don't reset intr timer */); - err = vnic_rq_fill(&enic->rq[rq], enic_rq_alloc_buf); + err = vnic_rq_fill(&enic->rq[rq].vrq, enic_rq_alloc_buf); /* Buffer allocation failed. Stay in polling mode * so we can try to fill the ring again. @@ -1660,7 +1489,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) /* Call the function which refreshes the intr coalescing timer * value based on the traffic. */ - enic_calc_int_moderation(enic, &enic->rq[rq]); + enic_calc_int_moderation(enic, &enic->rq[rq].vrq); if ((work_done < budget) && napi_complete_done(napi, work_done)) { @@ -1669,16 +1498,19 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) */ if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) - enic_set_int_moderation(enic, &enic->rq[rq]); + enic_set_int_moderation(enic, &enic->rq[rq].vrq); vnic_intr_unmask(&enic->intr[intr]); + enic->rq[rq].stats.napi_complete++; + } else { + enic->rq[rq].stats.napi_repoll++; } return work_done; } -static void enic_notify_timer(unsigned long data) +static void enic_notify_timer(struct timer_list *t) { - struct enic *enic = (struct enic *)data; + struct enic *enic = timer_container_of(enic, t, notify_timer); enic_notify_check(enic); @@ -1700,7 +1532,7 @@ static void enic_free_intr(struct enic *enic) free_irq(enic->pdev->irq, enic); break; case VNIC_DEV_INTR_MODE_MSIX: - for (i = 0; i < ARRAY_SIZE(enic->msix); i++) + for (i = 0; i < enic->intr_count; i++) if (enic->msix[i].requested) free_irq(enic->msix_entry[i].vector, enic->msix[i].devid); @@ -1767,7 +1599,7 @@ static int enic_request_intr(struct enic *enic) enic->msix[intr].isr = enic_isr_msix_notify; enic->msix[intr].devid = enic; - for (i = 0; i < ARRAY_SIZE(enic->msix); i++) + for (i = 0; i < enic->intr_count; i++) enic->msix[i].requested = 0; for (i = 0; i < enic->intr_count; i++) { @@ -1809,36 +1641,6 @@ static void enic_synchronize_irqs(struct enic *enic) } } -static void enic_set_rx_coal_setting(struct enic *enic) -{ - unsigned int speed; - int index = -1; - struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; - - /* 1. Read the link speed from fw - * 2. Pick the default range for the speed - * 3. Update it in enic->rx_coalesce_setting - */ - speed = vnic_dev_port_speed(enic->vdev); - if (ENIC_LINK_SPEED_10G < speed) - index = ENIC_LINK_40G_INDEX; - else if (ENIC_LINK_SPEED_4G < speed) - index = ENIC_LINK_10G_INDEX; - else - index = ENIC_LINK_4G_INDEX; - - rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start; - rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start; - rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END; - - /* Start with the value provided by UCSM */ - for (index = 0; index < enic->rq_count; index++) - enic->cq[index].cur_rx_coal_timeval = - enic->config.intr_timer_usec; - - rx_coal->use_adaptive_rx_coalesce = 1; -} - static int enic_dev_notify_set(struct enic *enic) { int err; @@ -1846,8 +1648,7 @@ static int enic_dev_notify_set(struct enic *enic) spin_lock_bh(&enic->devcmd_lock); switch (vnic_dev_get_intr_mode(enic->vdev)) { case VNIC_DEV_INTR_MODE_INTX: - err = vnic_dev_notify_set(enic->vdev, - enic_legacy_notify_intr()); + err = vnic_dev_notify_set(enic->vdev, ENIC_LEGACY_NOTIFY_INTR); break; case VNIC_DEV_INTR_MODE_MSIX: err = vnic_dev_notify_set(enic->vdev, @@ -1879,7 +1680,18 @@ static int enic_open(struct net_device *netdev) { struct enic *enic = netdev_priv(netdev); unsigned int i; - int err; + int err, ret; + unsigned int max_pkt_len = netdev->mtu + VLAN_ETH_HLEN; + struct page_pool_params pp_params = { + .order = get_order(max_pkt_len), + .pool_size = enic->config.rq_desc_count, + .nid = dev_to_node(&enic->pdev->dev), + .dev = &enic->pdev->dev, + .dma_dir = DMA_FROM_DEVICE, + .max_len = (max_pkt_len > PAGE_SIZE) ? max_pkt_len : PAGE_SIZE, + .netdev = netdev, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + }; err = enic_request_intr(enic); if (err) { @@ -1897,9 +1709,21 @@ static int enic_open(struct net_device *netdev) } for (i = 0; i < enic->rq_count; i++) { - vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf); + /* create a page pool for each RQ */ + pp_params.napi = &enic->napi[i]; + pp_params.queue_idx = i; + enic->rq[i].pool = page_pool_create(&pp_params); + if (IS_ERR(enic->rq[i].pool)) { + err = PTR_ERR(enic->rq[i].pool); + enic->rq[i].pool = NULL; + goto err_out_free_rq; + } + + /* enable rq before updating rq desc */ + vnic_rq_enable(&enic->rq[i].vrq); + vnic_rq_fill(&enic->rq[i].vrq, enic_rq_alloc_buf); /* Need at least one buffer on ring to get going */ - if (vnic_rq_desc_used(&enic->rq[i]) == 0) { + if (vnic_rq_desc_used(&enic->rq[i].vrq) == 0) { netdev_err(netdev, "Unable to alloc receive buffers\n"); err = -ENOMEM; goto err_out_free_rq; @@ -1907,9 +1731,7 @@ static int enic_open(struct net_device *netdev) } for (i = 0; i < enic->wq_count; i++) - vnic_wq_enable(&enic->wq[i]); - for (i = 0; i < enic->rq_count; i++) - vnic_rq_enable(&enic->rq[i]); + vnic_wq_enable(&enic->wq[i].vwq); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_add_station_addr(enic); @@ -1930,13 +1752,19 @@ static int enic_open(struct net_device *netdev) vnic_intr_unmask(&enic->intr[i]); enic_notify_timer_start(enic); - enic_rfs_flw_tbl_init(enic); + enic_rfs_timer_start(enic); return 0; err_out_free_rq: - for (i = 0; i < enic->rq_count; i++) - vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); + for (i = 0; i < enic->rq_count; i++) { + ret = vnic_rq_disable(&enic->rq[i].vrq); + if (!ret) { + vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); + page_pool_destroy(enic->rq[i].pool); + enic->rq[i].pool = NULL; + } + } enic_dev_notify_unset(enic); err_out_free_intr: enic_unset_affinity_hint(enic); @@ -1959,7 +1787,7 @@ static int enic_stop(struct net_device *netdev) enic_synchronize_irqs(enic); - del_timer_sync(&enic->notify_timer); + timer_delete_sync(&enic->notify_timer); enic_rfs_flw_tbl_free(enic); enic_dev_disable(enic); @@ -1968,21 +1796,21 @@ static int enic_stop(struct net_device *netdev) napi_disable(&enic->napi[i]); netif_carrier_off(netdev); - netif_tx_disable(netdev); if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) napi_disable(&enic->napi[enic_cq_wq(enic, i)]); + netif_tx_disable(netdev); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); for (i = 0; i < enic->wq_count; i++) { - err = vnic_wq_disable(&enic->wq[i]); + err = vnic_wq_disable(&enic->wq[i].vwq); if (err) return err; } for (i = 0; i < enic->rq_count; i++) { - err = vnic_rq_disable(&enic->rq[i]); + err = vnic_rq_disable(&enic->rq[i].vrq); if (err) return err; } @@ -1992,9 +1820,12 @@ static int enic_stop(struct net_device *netdev) enic_free_intr(enic); for (i = 0; i < enic->wq_count; i++) - vnic_wq_clean(&enic->wq[i], enic_free_wq_buf); - for (i = 0; i < enic->rq_count; i++) - vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); + vnic_wq_clean(&enic->wq[i].vwq, enic_free_wq_buf); + for (i = 0; i < enic->rq_count; i++) { + vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); + page_pool_destroy(enic->rq[i].pool); + enic->rq[i].pool = NULL; + } for (i = 0; i < enic->cq_count; i++) vnic_cq_clean(&enic->cq[i]); for (i = 0; i < enic->intr_count; i++) @@ -2003,28 +1834,42 @@ static int enic_stop(struct net_device *netdev) return 0; } +static int _enic_change_mtu(struct net_device *netdev, int new_mtu) +{ + bool running = netif_running(netdev); + int err = 0; + + ASSERT_RTNL(); + if (running) { + err = enic_stop(netdev); + if (err) + return err; + } + + WRITE_ONCE(netdev->mtu, new_mtu); + + if (running) { + err = enic_open(netdev); + if (err) + return err; + } + + return 0; +} + static int enic_change_mtu(struct net_device *netdev, int new_mtu) { struct enic *enic = netdev_priv(netdev); - int running = netif_running(netdev); if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; - if (running) - enic_stop(netdev); - - netdev->mtu = new_mtu; - - if (netdev->mtu > enic->port_mtu) + if (new_mtu > enic->port_mtu) netdev_warn(netdev, - "interface MTU (%d) set higher than port MTU (%d)\n", - netdev->mtu, enic->port_mtu); - - if (running) - enic_open(netdev); + "interface MTU (%d) set higher than port MTU (%d)\n", + new_mtu, enic->port_mtu); - return 0; + return _enic_change_mtu(netdev, new_mtu); } static void enic_change_mtu_work(struct work_struct *work) @@ -2032,47 +1877,9 @@ static void enic_change_mtu_work(struct work_struct *work) struct enic *enic = container_of(work, struct enic, change_mtu_work); struct net_device *netdev = enic->netdev; int new_mtu = vnic_dev_mtu(enic->vdev); - int err; - unsigned int i; - - new_mtu = max_t(int, ENIC_MIN_MTU, min_t(int, ENIC_MAX_MTU, new_mtu)); rtnl_lock(); - - /* Stop RQ */ - del_timer_sync(&enic->notify_timer); - - for (i = 0; i < enic->rq_count; i++) - napi_disable(&enic->napi[i]); - - vnic_intr_mask(&enic->intr[0]); - enic_synchronize_irqs(enic); - err = vnic_rq_disable(&enic->rq[0]); - if (err) { - rtnl_unlock(); - netdev_err(netdev, "Unable to disable RQ.\n"); - return; - } - vnic_rq_clean(&enic->rq[0], enic_free_rq_buf); - vnic_cq_clean(&enic->cq[0]); - vnic_intr_clean(&enic->intr[0]); - - /* Fill RQ with new_mtu-sized buffers */ - netdev->mtu = new_mtu; - vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); - /* Need at least one buffer on ring to get going */ - if (vnic_rq_desc_used(&enic->rq[0]) == 0) { - rtnl_unlock(); - netdev_err(netdev, "Unable to alloc receive buffers.\n"); - return; - } - - /* Start RQ */ - vnic_rq_enable(&enic->rq[0]); - napi_enable(&enic->napi[0]); - vnic_intr_unmask(&enic->intr[0]); - enic_notify_timer_start(enic); - + (void)_enic_change_mtu(netdev, new_mtu); rtnl_unlock(); netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu); @@ -2121,8 +1928,6 @@ static int enic_dev_wait(struct vnic_dev *vdev, int done; int err; - BUG_ON(in_interrupt()); - err = start(vdev, arg); if (err) return err; @@ -2150,9 +1955,10 @@ static int enic_dev_wait(struct vnic_dev *vdev, static int enic_dev_open(struct enic *enic) { int err; + u32 flags = CMD_OPENF_IG_DESCCACHE; err = enic_dev_wait(enic->vdev, vnic_dev_open, - vnic_dev_open_done, 0); + vnic_dev_open_done, flags); if (err) dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n", err); @@ -2192,9 +1998,9 @@ int __enic_set_rsskey(struct enic *enic) dma_addr_t rss_key_buf_pa; int i, kidx, bidx, err; - rss_key_buf_va = pci_zalloc_consistent(enic->pdev, - sizeof(union vnic_rss_key), - &rss_key_buf_pa); + rss_key_buf_va = dma_alloc_coherent(&enic->pdev->dev, + sizeof(union vnic_rss_key), + &rss_key_buf_pa, GFP_ATOMIC); if (!rss_key_buf_va) return -ENOMEM; @@ -2209,8 +2015,8 @@ int __enic_set_rsskey(struct enic *enic) sizeof(union vnic_rss_key)); spin_unlock_bh(&enic->devcmd_lock); - pci_free_consistent(enic->pdev, sizeof(union vnic_rss_key), - rss_key_buf_va, rss_key_buf_pa); + dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_key), + rss_key_buf_va, rss_key_buf_pa); return err; } @@ -2229,8 +2035,9 @@ static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) unsigned int i; int err; - rss_cpu_buf_va = pci_alloc_consistent(enic->pdev, - sizeof(union vnic_rss_cpu), &rss_cpu_buf_pa); + rss_cpu_buf_va = dma_alloc_coherent(&enic->pdev->dev, + sizeof(union vnic_rss_cpu), + &rss_cpu_buf_pa, GFP_ATOMIC); if (!rss_cpu_buf_va) return -ENOMEM; @@ -2243,8 +2050,8 @@ static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) sizeof(union vnic_rss_cpu)); spin_unlock_bh(&enic->devcmd_lock); - pci_free_consistent(enic->pdev, sizeof(union vnic_rss_cpu), - rss_cpu_buf_va, rss_cpu_buf_pa); + dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_cpu), + rss_cpu_buf_va, rss_cpu_buf_pa); return err; } @@ -2274,14 +2081,24 @@ static int enic_set_rss_nic_cfg(struct enic *enic) { struct device *dev = enic_get_dev(enic); const u8 rss_default_cpu = 0; - const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 | - NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 | - NIC_CFG_RSS_HASH_TYPE_IPV6 | - NIC_CFG_RSS_HASH_TYPE_TCP_IPV6; const u8 rss_hash_bits = 7; const u8 rss_base_cpu = 0; + u8 rss_hash_type; + int res; u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1); + spin_lock_bh(&enic->devcmd_lock); + res = vnic_dev_capable_rss_hash_type(enic->vdev, &rss_hash_type); + spin_unlock_bh(&enic->devcmd_lock); + if (res) { + /* defaults for old adapters + */ + rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 | + NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 | + NIC_CFG_RSS_HASH_TYPE_IPV6 | + NIC_CFG_RSS_HASH_TYPE_TCP_IPV6; + } + if (rss_enable) { if (!enic_set_rsskey(enic)) { if (enic_set_rsscpu(enic, rss_hash_bits)) { @@ -2299,6 +2116,13 @@ static int enic_set_rss_nic_cfg(struct enic *enic) rss_hash_bits, rss_base_cpu, rss_enable); } +static void enic_set_api_busy(struct enic *enic, bool busy) +{ + spin_lock(&enic->enic_api_lock); + enic->enic_api_busy = busy; + spin_unlock(&enic->enic_api_lock); +} + static void enic_reset(struct work_struct *work) { struct enic *enic = container_of(work, struct enic, reset); @@ -2308,15 +2132,21 @@ static void enic_reset(struct work_struct *work) rtnl_lock(); - spin_lock(&enic->enic_api_lock); + /* Stop any activity from infiniband */ + enic_set_api_busy(enic, true); + enic_stop(enic->netdev); enic_dev_soft_reset(enic); enic_reset_addr_lists(enic); enic_init_vnic_resources(enic); enic_set_rss_nic_cfg(enic); enic_dev_set_ig_vlan_rewrite_mode(enic); + enic_ext_cq(enic); enic_open(enic->netdev); - spin_unlock(&enic->enic_api_lock); + + /* Allow infiniband to fiddle with the device again */ + enic_set_api_busy(enic, false); + call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); rtnl_unlock(); @@ -2328,7 +2158,9 @@ static void enic_tx_hang_reset(struct work_struct *work) rtnl_lock(); - spin_lock(&enic->enic_api_lock); + /* Stop any activity from infiniband */ + enic_set_api_busy(enic, true); + enic_dev_hang_notify(enic); enic_stop(enic->netdev); enic_dev_hang_reset(enic); @@ -2336,8 +2168,12 @@ static void enic_tx_hang_reset(struct work_struct *work) enic_init_vnic_resources(enic); enic_set_rss_nic_cfg(enic); enic_dev_set_ig_vlan_rewrite_mode(enic); + enic_ext_cq(enic); enic_open(enic->netdev); - spin_unlock(&enic->enic_api_lock); + + /* Allow infiniband to fiddle with the device again */ + enic_set_api_busy(enic, false); + call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); rtnl_unlock(); @@ -2345,112 +2181,56 @@ static void enic_tx_hang_reset(struct work_struct *work) static int enic_set_intr_mode(struct enic *enic) { - unsigned int n = min_t(unsigned int, enic->rq_count, ENIC_RQ_MAX); - unsigned int m = min_t(unsigned int, enic->wq_count, ENIC_WQ_MAX); unsigned int i; + int num_intr; /* Set interrupt mode (INTx, MSI, MSI-X) depending * on system capabilities. * * Try MSI-X first - * - * We need n RQs, m WQs, n+m CQs, and n+m+2 INTRs - * (the second to last INTR is used for WQ/RQ errors) - * (the last INTR is used for notifications) */ - BUG_ON(ARRAY_SIZE(enic->msix_entry) < n + m + 2); - for (i = 0; i < n + m + 2; i++) - enic->msix_entry[i].entry = i; - - /* Use multiple RQs if RSS is enabled - */ - - if (ENIC_SETTING(enic, RSS) && - enic->config.intr_mode < 1 && - enic->rq_count >= n && - enic->wq_count >= m && - enic->cq_count >= n + m && - enic->intr_count >= n + m + 2) { - - if (pci_enable_msix_range(enic->pdev, enic->msix_entry, - n + m + 2, n + m + 2) > 0) { - - enic->rq_count = n; - enic->wq_count = m; - enic->cq_count = n + m; - enic->intr_count = n + m + 2; - - vnic_dev_set_intr_mode(enic->vdev, - VNIC_DEV_INTR_MODE_MSIX); - - return 0; - } - } - if (enic->config.intr_mode < 1 && - enic->rq_count >= 1 && - enic->wq_count >= m && - enic->cq_count >= 1 + m && - enic->intr_count >= 1 + m + 2) { - if (pci_enable_msix_range(enic->pdev, enic->msix_entry, - 1 + m + 2, 1 + m + 2) > 0) { - - enic->rq_count = 1; - enic->wq_count = m; - enic->cq_count = 1 + m; - enic->intr_count = 1 + m + 2; - + enic->intr_avail >= ENIC_MSIX_MIN_INTR) { + for (i = 0; i < enic->intr_avail; i++) + enic->msix_entry[i].entry = i; + + num_intr = pci_enable_msix_range(enic->pdev, enic->msix_entry, + ENIC_MSIX_MIN_INTR, + enic->intr_avail); + if (num_intr > 0) { vnic_dev_set_intr_mode(enic->vdev, - VNIC_DEV_INTR_MODE_MSIX); - + VNIC_DEV_INTR_MODE_MSIX); + enic->intr_avail = num_intr; return 0; } } /* Next try MSI * - * We need 1 RQ, 1 WQ, 2 CQs, and 1 INTR + * We need 1 INTR */ if (enic->config.intr_mode < 2 && - enic->rq_count >= 1 && - enic->wq_count >= 1 && - enic->cq_count >= 2 && - enic->intr_count >= 1 && + enic->intr_avail >= 1 && !pci_enable_msi(enic->pdev)) { - - enic->rq_count = 1; - enic->wq_count = 1; - enic->cq_count = 2; - enic->intr_count = 1; - + enic->intr_avail = 1; vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSI); - return 0; } /* Next try INTx * - * We need 1 RQ, 1 WQ, 2 CQs, and 3 INTRs + * We need 3 INTRs * (the first INTR is used for WQ/RQ) * (the second INTR is used for WQ/RQ errors) * (the last INTR is used for notifications) */ if (enic->config.intr_mode < 3 && - enic->rq_count >= 1 && - enic->wq_count >= 1 && - enic->cq_count >= 2 && - enic->intr_count >= 3) { - - enic->rq_count = 1; - enic->wq_count = 1; - enic->cq_count = 2; - enic->intr_count = 3; - + enic->intr_avail >= 3) { + enic->intr_avail = 3; vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_INTX); - return 0; } @@ -2475,6 +2255,127 @@ static void enic_clear_intr_mode(struct enic *enic) vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); } +static int enic_adjust_resources(struct enic *enic) +{ + unsigned int max_queues; + unsigned int rq_default; + unsigned int rq_avail; + unsigned int wq_avail; + + if (enic->rq_avail < 1 || enic->wq_avail < 1 || enic->cq_avail < 2) { + dev_err(enic_get_dev(enic), + "Not enough resources available rq: %d wq: %d cq: %d\n", + enic->rq_avail, enic->wq_avail, + enic->cq_avail); + return -ENOSPC; + } + + if (is_kdump_kernel()) { + dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n"); + enic->rq_avail = 1; + enic->wq_avail = 1; + enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS; + enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS; + enic->config.mtu = min_t(u16, 1500, enic->config.mtu); + } + + /* if RSS isn't set, then we can only use one RQ */ + if (!ENIC_SETTING(enic, RSS)) + enic->rq_avail = 1; + + switch (vnic_dev_get_intr_mode(enic->vdev)) { + case VNIC_DEV_INTR_MODE_INTX: + case VNIC_DEV_INTR_MODE_MSI: + enic->rq_count = 1; + enic->wq_count = 1; + enic->cq_count = 2; + enic->intr_count = enic->intr_avail; + break; + case VNIC_DEV_INTR_MODE_MSIX: + /* Adjust the number of wqs/rqs/cqs/interrupts that will be + * used based on which resource is the most constrained + */ + wq_avail = min(enic->wq_avail, ENIC_WQ_MAX); + rq_default = max(netif_get_num_default_rss_queues(), + ENIC_RQ_MIN_DEFAULT); + rq_avail = min3(enic->rq_avail, ENIC_RQ_MAX, rq_default); + max_queues = min(enic->cq_avail, + enic->intr_avail - ENIC_MSIX_RESERVED_INTR); + if (wq_avail + rq_avail <= max_queues) { + enic->rq_count = rq_avail; + enic->wq_count = wq_avail; + } else { + /* recalculate wq/rq count */ + if (rq_avail < wq_avail) { + enic->rq_count = min(rq_avail, max_queues / 2); + enic->wq_count = max_queues - enic->rq_count; + } else { + enic->wq_count = min(wq_avail, max_queues / 2); + enic->rq_count = max_queues - enic->wq_count; + } + } + enic->cq_count = enic->rq_count + enic->wq_count; + enic->intr_count = enic->cq_count + ENIC_MSIX_RESERVED_INTR; + + break; + default: + dev_err(enic_get_dev(enic), "Unknown interrupt mode\n"); + return -EINVAL; + } + + return 0; +} + +static void enic_get_queue_stats_rx(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *rxs) +{ + struct enic *enic = netdev_priv(dev); + struct enic_rq_stats *rqstats = &enic->rq[idx].stats; + + rxs->bytes = rqstats->bytes; + rxs->packets = rqstats->packets; + rxs->hw_drops = rqstats->bad_fcs + rqstats->pkt_truncated; + rxs->hw_drop_overruns = rqstats->pkt_truncated; + rxs->csum_unnecessary = rqstats->csum_unnecessary + + rqstats->csum_unnecessary_encap; + rxs->alloc_fail = rqstats->pp_alloc_fail; +} + +static void enic_get_queue_stats_tx(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *txs) +{ + struct enic *enic = netdev_priv(dev); + struct enic_wq_stats *wqstats = &enic->wq[idx].stats; + + txs->bytes = wqstats->bytes; + txs->packets = wqstats->packets; + txs->csum_none = wqstats->csum_none; + txs->needs_csum = wqstats->csum_partial + wqstats->encap_csum + + wqstats->tso; + txs->hw_gso_packets = wqstats->tso; + txs->stop = wqstats->stopped; + txs->wake = wqstats->wake; +} + +static void enic_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rxs, + struct netdev_queue_stats_tx *txs) +{ + rxs->bytes = 0; + rxs->packets = 0; + rxs->hw_drops = 0; + rxs->hw_drop_overruns = 0; + rxs->csum_unnecessary = 0; + rxs->alloc_fail = 0; + txs->bytes = 0; + txs->packets = 0; + txs->csum_none = 0; + txs->needs_csum = 0; + txs->hw_gso_packets = 0; + txs->stop = 0; + txs->wake = 0; +} + static const struct net_device_ops enic_netdev_dynamic_ops = { .ndo_open = enic_open, .ndo_stop = enic_stop, @@ -2496,8 +2397,6 @@ static const struct net_device_ops enic_netdev_dynamic_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = enic_rx_flow_steer, #endif - .ndo_udp_tunnel_add = enic_udp_tunnel_add, - .ndo_udp_tunnel_del = enic_udp_tunnel_del, .ndo_features_check = enic_features_check, }; @@ -2522,38 +2421,98 @@ static const struct net_device_ops enic_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = enic_rx_flow_steer, #endif - .ndo_udp_tunnel_add = enic_udp_tunnel_add, - .ndo_udp_tunnel_del = enic_udp_tunnel_del, .ndo_features_check = enic_features_check, }; +static const struct netdev_stat_ops enic_netdev_stat_ops = { + .get_queue_stats_rx = enic_get_queue_stats_rx, + .get_queue_stats_tx = enic_get_queue_stats_tx, + .get_base_stats = enic_get_base_stats, +}; + +static void enic_free_enic_resources(struct enic *enic) +{ + kfree(enic->wq); + enic->wq = NULL; + + kfree(enic->rq); + enic->rq = NULL; + + kfree(enic->cq); + enic->cq = NULL; + + kfree(enic->napi); + enic->napi = NULL; + + kfree(enic->msix_entry); + enic->msix_entry = NULL; + + kfree(enic->msix); + enic->msix = NULL; + + kfree(enic->intr); + enic->intr = NULL; +} + +static int enic_alloc_enic_resources(struct enic *enic) +{ + enic->wq = kcalloc(enic->wq_avail, sizeof(struct enic_wq), GFP_KERNEL); + if (!enic->wq) + goto free_queues; + + enic->rq = kcalloc(enic->rq_avail, sizeof(struct enic_rq), GFP_KERNEL); + if (!enic->rq) + goto free_queues; + + enic->cq = kcalloc(enic->cq_avail, sizeof(struct vnic_cq), GFP_KERNEL); + if (!enic->cq) + goto free_queues; + + enic->napi = kcalloc(enic->wq_avail + enic->rq_avail, + sizeof(struct napi_struct), GFP_KERNEL); + if (!enic->napi) + goto free_queues; + + enic->msix_entry = kcalloc(enic->intr_avail, sizeof(struct msix_entry), + GFP_KERNEL); + if (!enic->msix_entry) + goto free_queues; + + enic->msix = kcalloc(enic->intr_avail, sizeof(struct enic_msix_entry), + GFP_KERNEL); + if (!enic->msix) + goto free_queues; + + enic->intr = kcalloc(enic->intr_avail, sizeof(struct vnic_intr), + GFP_KERNEL); + if (!enic->intr) + goto free_queues; + + return 0; + +free_queues: + enic_free_enic_resources(enic); + return -ENOMEM; +} + static void enic_dev_deinit(struct enic *enic) { unsigned int i; - for (i = 0; i < enic->rq_count; i++) { - napi_hash_del(&enic->napi[i]); - netif_napi_del(&enic->napi[i]); - } + for (i = 0; i < enic->rq_count; i++) + __netif_napi_del(&enic->napi[i]); + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) - netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); + __netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); + + /* observe RCU grace period after __netif_napi_del() calls */ + synchronize_net(); enic_free_vnic_resources(enic); enic_clear_intr_mode(enic); enic_free_affinity_hint(enic); -} - -static void enic_kdump_kernel_config(struct enic *enic) -{ - if (is_kdump_kernel()) { - dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n"); - enic->rq_count = 1; - enic->wq_count = 1; - enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS; - enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS; - enic->config.mtu = min_t(u16, 1500, enic->config.mtu); - } + enic_free_enic_resources(enic); } static int enic_dev_init(struct enic *enic) @@ -2585,19 +2544,28 @@ static int enic_dev_init(struct enic *enic) enic_get_res_counts(enic); - /* modify resource count if we are in kdump_kernel - */ - enic_kdump_kernel_config(enic); + enic_ext_cq(enic); - /* Set interrupt mode based on resource counts and system - * capabilities - */ + err = enic_alloc_enic_resources(enic); + if (err) { + dev_err(dev, "Failed to allocate enic resources\n"); + return err; + } + + /* Set interrupt mode based on system capabilities */ err = enic_set_intr_mode(enic); if (err) { dev_err(dev, "Failed to set intr mode based on resource " "counts and system capabilities, aborting\n"); - return err; + goto err_out_free_vnic_resources; + } + + /* Adjust resource counts based on most constrained resources */ + err = enic_adjust_resources(enic); + if (err) { + dev_err(dev, "Failed to adjust resources\n"); + goto err_out_free_vnic_resources; } /* Allocate and configure vNIC resources @@ -2619,16 +2587,17 @@ static int enic_dev_init(struct enic *enic) switch (vnic_dev_get_intr_mode(enic->vdev)) { default: - netif_napi_add(netdev, &enic->napi[0], enic_poll, 64); + netif_napi_add(netdev, &enic->napi[0], enic_poll); break; case VNIC_DEV_INTR_MODE_MSIX: for (i = 0; i < enic->rq_count; i++) { netif_napi_add(netdev, &enic->napi[i], - enic_poll_msix_rq, NAPI_POLL_WEIGHT); + enic_poll_msix_rq); } for (i = 0; i < enic->wq_count; i++) - netif_napi_add(netdev, &enic->napi[enic_cq_wq(enic, i)], - enic_poll_msix_wq, NAPI_POLL_WEIGHT); + netif_napi_add(netdev, + &enic->napi[enic_cq_wq(enic, i)], + enic_poll_msix_wq); break; } @@ -2638,6 +2607,7 @@ err_out_free_vnic_resources: enic_free_affinity_hint(enic); enic_clear_intr_mode(enic); enic_free_vnic_resources(enic); + enic_free_enic_resources(enic); return err; } @@ -2699,30 +2669,18 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); /* Query PCI controller on system for DMA addressing - * limitation for the device. Try 64-bit first, and + * limitation for the device. Try 47-bit first, and * fail to 32-bit. */ - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(47)); if (err) { - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { dev_err(dev, "No usable DMA configuration, aborting\n"); goto err_out_release_regions; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) { - dev_err(dev, "Unable to obtain %u-bit DMA " - "for consistent allocations, aborting\n", 32); - goto err_out_release_regions; - } } else { - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) { - dev_err(dev, "Unable to obtain %u-bit DMA " - "for consistent allocations, aborting\n", 64); - goto err_out_release_regions; - } using_dac = 1; } @@ -2846,23 +2804,20 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup notification timer, HW reset task, and wq locks */ - init_timer(&enic->notify_timer); - enic->notify_timer.function = enic_notify_timer; - enic->notify_timer.data = (unsigned long)enic; + timer_setup(&enic->notify_timer, enic_notify_timer, 0); - enic_set_rx_coal_setting(enic); + enic_rfs_flw_tbl_init(enic); INIT_WORK(&enic->reset, enic_reset); INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work); for (i = 0; i < enic->wq_count; i++) - spin_lock_init(&enic->wq_lock[i]); + spin_lock_init(&enic->wq[i].lock); /* Register net device */ enic->port_mtu = enic->config.mtu; - (void)enic_change_mtu(netdev, enic->port_mtu); err = enic_set_mac_addr(netdev, enic->mac_addr); if (err) { @@ -2880,6 +2835,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->netdev_ops = &enic_netdev_dynamic_ops; else netdev->netdev_ops = &enic_netdev_ops; + netdev->stat_ops = &enic_netdev_stat_ops; netdev->watchdog_timeo = 2 * HZ; enic_set_ethtool_ops(netdev); @@ -2902,9 +2858,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_RXCSUM; if (ENIC_SETTING(enic, VXLAN)) { u64 patch_level; + u64 a1 = 0; netdev->hw_enc_features |= NETIF_F_RXCSUM | NETIF_F_TSO | + NETIF_F_TSO6 | NETIF_F_TSO_ECN | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_HW_CSUM | @@ -2923,15 +2881,23 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ err = vnic_dev_get_supported_feature_ver(enic->vdev, VIC_FEATURE_VXLAN, - &patch_level); + &patch_level, &a1); if (err) patch_level = 0; + enic->vxlan.flags = (u8)a1; /* mask bits that are supported by driver */ patch_level &= BIT_ULL(0) | BIT_ULL(2); patch_level = fls(patch_level); patch_level = patch_level ? patch_level - 1 : 0; enic->vxlan.patch_level = patch_level; + + if (vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) == 1 || + enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ) { + netdev->udp_tunnel_nic_info = &enic_udp_tunnels_v4; + if (enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6) + netdev->udp_tunnel_nic_info = &enic_udp_tunnels; + } } netdev->features |= netdev->hw_features; @@ -2949,13 +2915,13 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* MTU range: 68 - 9000 */ netdev->min_mtu = ENIC_MIN_MTU; netdev->max_mtu = ENIC_MAX_MTU; + netdev->mtu = enic->port_mtu; err = register_netdev(netdev); if (err) { dev_err(dev, "Cannot register net device, aborting\n"); goto err_out_dev_deinit; } - enic->rx_copybreak = RX_COPYBREAK_DEFAULT; return 0; @@ -3020,17 +2986,4 @@ static struct pci_driver enic_driver = { .remove = enic_remove, }; -static int __init enic_init_module(void) -{ - pr_info("%s, ver %s\n", DRV_DESCRIPTION, DRV_VERSION); - - return pci_register_driver(&enic_driver); -} - -static void __exit enic_cleanup_module(void) -{ - pci_unregister_driver(&enic_driver); -} - -module_init(enic_init_module); -module_exit(enic_cleanup_module); +module_pci_driver(enic_driver); |
