diff options
Diffstat (limited to 'drivers/net/ethernet/cisco/enic/enic_main.c')
| -rw-r--r-- | drivers/net/ethernet/cisco/enic/enic_main.c | 2152 |
1 files changed, 1267 insertions, 885 deletions
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 992ec2ee64d9..6bc8dfdb3d4b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -31,7 +31,6 @@ #include <linux/if.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> -#include <linux/ethtool.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -39,6 +38,15 @@ #include <linux/rtnetlink.h> #include <linux/prefetch.h> #include <net/ip6_checksum.h> +#include <linux/ktime.h> +#include <linux/numa.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif +#include <linux/crash_dump.h> +#include <net/busy_poll.h> +#include <net/vxlan.h> +#include <net/netdev_queues.h> #include "cq_enet_desc.h" #include "vnic_dev.h" @@ -49,18 +57,18 @@ #include "enic.h" #include "enic_dev.h" #include "enic_pp.h" +#include "enic_clsf.h" +#include "enic_rq.h" +#include "enic_wq.h" #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) -#define WQ_ENET_MAX_DESC_LEN (1 << WQ_ENET_LEN_BITS) -#define MAX_TSO (1 << 16) -#define ENIC_DESC_MAX_SPLITS (MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1) #define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ /* Supported devices */ -static DEFINE_PCI_DEVICE_TABLE(enic_id_table) = { +static const struct pci_device_id enic_id_table[] = { { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) }, { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) }, { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) }, @@ -70,360 +78,270 @@ static DEFINE_PCI_DEVICE_TABLE(enic_id_table) = { MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("Scott Feldman <scofeldm@cisco.com>"); MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, enic_id_table); -struct enic_stat { - char name[ETH_GSTRING_LEN]; - unsigned int offset; -}; - -#define ENIC_TX_STAT(stat) \ - { .name = #stat, .offset = offsetof(struct vnic_tx_stats, stat) / 8 } -#define ENIC_RX_STAT(stat) \ - { .name = #stat, .offset = offsetof(struct vnic_rx_stats, stat) / 8 } - -static const struct enic_stat enic_tx_stats[] = { - ENIC_TX_STAT(tx_frames_ok), - ENIC_TX_STAT(tx_unicast_frames_ok), - ENIC_TX_STAT(tx_multicast_frames_ok), - ENIC_TX_STAT(tx_broadcast_frames_ok), - ENIC_TX_STAT(tx_bytes_ok), - ENIC_TX_STAT(tx_unicast_bytes_ok), - ENIC_TX_STAT(tx_multicast_bytes_ok), - ENIC_TX_STAT(tx_broadcast_bytes_ok), - ENIC_TX_STAT(tx_drops), - ENIC_TX_STAT(tx_errors), - ENIC_TX_STAT(tx_tso), +#define ENIC_LARGE_PKT_THRESHOLD 1000 +#define ENIC_MAX_COALESCE_TIMERS 10 +/* Interrupt moderation table, which will be used to decide the + * coalescing timer values + * {rx_rate in Mbps, mapping percentage of the range} + */ +static struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = { + {4000, 0}, + {4400, 10}, + {5060, 20}, + {5230, 30}, + {5540, 40}, + {5820, 50}, + {6120, 60}, + {6435, 70}, + {6745, 80}, + {7000, 90}, + {0xFFFFFFFF, 100} }; -static const struct enic_stat enic_rx_stats[] = { - ENIC_RX_STAT(rx_frames_ok), - ENIC_RX_STAT(rx_frames_total), - ENIC_RX_STAT(rx_unicast_frames_ok), - ENIC_RX_STAT(rx_multicast_frames_ok), - ENIC_RX_STAT(rx_broadcast_frames_ok), - ENIC_RX_STAT(rx_bytes_ok), - ENIC_RX_STAT(rx_unicast_bytes_ok), - ENIC_RX_STAT(rx_multicast_bytes_ok), - ENIC_RX_STAT(rx_broadcast_bytes_ok), - ENIC_RX_STAT(rx_drop), - ENIC_RX_STAT(rx_no_bufs), - ENIC_RX_STAT(rx_errors), - ENIC_RX_STAT(rx_rss), - ENIC_RX_STAT(rx_crc_errors), - ENIC_RX_STAT(rx_frames_64), - ENIC_RX_STAT(rx_frames_127), - ENIC_RX_STAT(rx_frames_255), - ENIC_RX_STAT(rx_frames_511), - ENIC_RX_STAT(rx_frames_1023), - ENIC_RX_STAT(rx_frames_1518), - ENIC_RX_STAT(rx_frames_to_max), +/* This table helps the driver to pick different ranges for rx coalescing + * timer depending on the link speed. + */ +static struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = { + {0, 0}, /* 0 - 4 Gbps */ + {0, 3}, /* 4 - 10 Gbps */ + {3, 6}, /* 10+ Gbps */ }; -static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats); -static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats); - -int enic_is_dynamic(struct enic *enic) -{ - return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN; -} - -int enic_sriov_enabled(struct enic *enic) -{ - return (enic->priv_flags & ENIC_SRIOV_ENABLED) ? 1 : 0; -} - -static int enic_is_sriov_vf(struct enic *enic) -{ - return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF; -} - -int enic_is_valid_vf(struct enic *enic, int vf) +static void enic_init_affinity_hint(struct enic *enic) { -#ifdef CONFIG_PCI_IOV - return vf >= 0 && vf < enic->num_vfs; -#else - return 0; -#endif -} + int numa_node = dev_to_node(&enic->pdev->dev); + int i; -static inline unsigned int enic_cq_rq(struct enic *enic, unsigned int rq) -{ - return rq; -} - -static inline unsigned int enic_cq_wq(struct enic *enic, unsigned int wq) -{ - return enic->rq_count + wq; -} - -static inline unsigned int enic_legacy_io_intr(void) -{ - return 0; + for (i = 0; i < enic->intr_count; i++) { + if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) || + (cpumask_available(enic->msix[i].affinity_mask) && + !cpumask_empty(enic->msix[i].affinity_mask))) + continue; + if (zalloc_cpumask_var(&enic->msix[i].affinity_mask, + GFP_KERNEL)) + cpumask_set_cpu(cpumask_local_spread(i, numa_node), + enic->msix[i].affinity_mask); + } } -static inline unsigned int enic_legacy_err_intr(void) +static void enic_free_affinity_hint(struct enic *enic) { - return 1; -} + int i; -static inline unsigned int enic_legacy_notify_intr(void) -{ - return 2; + for (i = 0; i < enic->intr_count; i++) { + if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i)) + continue; + free_cpumask_var(enic->msix[i].affinity_mask); + } } -static inline unsigned int enic_msix_rq_intr(struct enic *enic, unsigned int rq) +static void enic_set_affinity_hint(struct enic *enic) { - return enic->cq[enic_cq_rq(enic, rq)].interrupt_offset; -} + int i; + int err; -static inline unsigned int enic_msix_wq_intr(struct enic *enic, unsigned int wq) -{ - return enic->cq[enic_cq_wq(enic, wq)].interrupt_offset; -} + for (i = 0; i < enic->intr_count; i++) { + if (enic_is_err_intr(enic, i) || + enic_is_notify_intr(enic, i) || + !cpumask_available(enic->msix[i].affinity_mask) || + cpumask_empty(enic->msix[i].affinity_mask)) + continue; + err = irq_update_affinity_hint(enic->msix_entry[i].vector, + enic->msix[i].affinity_mask); + if (err) + netdev_warn(enic->netdev, "irq_update_affinity_hint failed, err %d\n", + err); + } -static inline unsigned int enic_msix_err_intr(struct enic *enic) -{ - return enic->rq_count + enic->wq_count; -} + for (i = 0; i < enic->wq_count; i++) { + int wq_intr = enic_msix_wq_intr(enic, i); -static inline unsigned int enic_msix_notify_intr(struct enic *enic) -{ - return enic->rq_count + enic->wq_count + 1; + if (cpumask_available(enic->msix[wq_intr].affinity_mask) && + !cpumask_empty(enic->msix[wq_intr].affinity_mask)) + netif_set_xps_queue(enic->netdev, + enic->msix[wq_intr].affinity_mask, + i); + } } -static int enic_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static void enic_unset_affinity_hint(struct enic *enic) { - struct enic *enic = netdev_priv(netdev); - - ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); - ecmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); - ecmd->port = PORT_FIBRE; - ecmd->transceiver = XCVR_EXTERNAL; - - if (netif_carrier_ok(netdev)) { - ethtool_cmd_speed_set(ecmd, vnic_dev_port_speed(enic->vdev)); - ecmd->duplex = DUPLEX_FULL; - } else { - ethtool_cmd_speed_set(ecmd, -1); - ecmd->duplex = -1; - } - - ecmd->autoneg = AUTONEG_DISABLE; + int i; - return 0; + for (i = 0; i < enic->intr_count; i++) + irq_update_affinity_hint(enic->msix_entry[i].vector, NULL); } -static void enic_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) +static int enic_udp_tunnel_set_port(struct net_device *netdev, + unsigned int table, unsigned int entry, + struct udp_tunnel_info *ti) { struct enic *enic = netdev_priv(netdev); - struct vnic_devcmd_fw_info *fw_info; + int err; - enic_dev_fw_info(enic, &fw_info); + spin_lock_bh(&enic->devcmd_lock); - strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); - strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); - strlcpy(drvinfo->fw_version, fw_info->fw_version, - sizeof(drvinfo->fw_version)); - strlcpy(drvinfo->bus_info, pci_name(enic->pdev), - sizeof(drvinfo->bus_info)); -} + err = vnic_dev_overlay_offload_cfg(enic->vdev, + OVERLAY_CFG_VXLAN_PORT_UPDATE, + ntohs(ti->port)); + if (err) + goto error; -static void enic_get_strings(struct net_device *netdev, u32 stringset, u8 *data) -{ - unsigned int i; + err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, + enic->vxlan.patch_level); + if (err) + goto error; - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < enic_n_tx_stats; i++) { - memcpy(data, enic_tx_stats[i].name, ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN; - } - for (i = 0; i < enic_n_rx_stats; i++) { - memcpy(data, enic_rx_stats[i].name, ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN; - } - break; - } -} + enic->vxlan.vxlan_udp_port_number = ntohs(ti->port); +error: + spin_unlock_bh(&enic->devcmd_lock); -static int enic_get_sset_count(struct net_device *netdev, int sset) -{ - switch (sset) { - case ETH_SS_STATS: - return enic_n_tx_stats + enic_n_rx_stats; - default: - return -EOPNOTSUPP; - } + return err; } -static void enic_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats *stats, u64 *data) +static int enic_udp_tunnel_unset_port(struct net_device *netdev, + unsigned int table, unsigned int entry, + struct udp_tunnel_info *ti) { struct enic *enic = netdev_priv(netdev); - struct vnic_stats *vstats; - unsigned int i; - - enic_dev_stats_dump(enic, &vstats); + int err; - for (i = 0; i < enic_n_tx_stats; i++) - *(data++) = ((u64 *)&vstats->tx)[enic_tx_stats[i].offset]; - for (i = 0; i < enic_n_rx_stats; i++) - *(data++) = ((u64 *)&vstats->rx)[enic_rx_stats[i].offset]; -} + spin_lock_bh(&enic->devcmd_lock); -static u32 enic_get_msglevel(struct net_device *netdev) -{ - struct enic *enic = netdev_priv(netdev); - return enic->msg_enable; -} + err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, + OVERLAY_OFFLOAD_DISABLE); + if (err) + goto unlock; -static void enic_set_msglevel(struct net_device *netdev, u32 value) -{ - struct enic *enic = netdev_priv(netdev); - enic->msg_enable = value; -} + enic->vxlan.vxlan_udp_port_number = 0; -static int enic_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ecmd) -{ - struct enic *enic = netdev_priv(netdev); +unlock: + spin_unlock_bh(&enic->devcmd_lock); - ecmd->tx_coalesce_usecs = enic->tx_coalesce_usecs; - ecmd->rx_coalesce_usecs = enic->rx_coalesce_usecs; - - return 0; + return err; } -static int enic_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ecmd) +static const struct udp_tunnel_nic_info enic_udp_tunnels = { + .set_port = enic_udp_tunnel_set_port, + .unset_port = enic_udp_tunnel_unset_port, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + }, +}, enic_udp_tunnels_v4 = { + .set_port = enic_udp_tunnel_set_port, + .unset_port = enic_udp_tunnel_unset_port, + .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + }, +}; + +static netdev_features_t enic_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) { - struct enic *enic = netdev_priv(netdev); - u32 tx_coalesce_usecs; - u32 rx_coalesce_usecs; - unsigned int i, intr; + const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); + struct enic *enic = netdev_priv(dev); + struct udphdr *udph; + u16 port = 0; + u8 proto; - tx_coalesce_usecs = min_t(u32, ecmd->tx_coalesce_usecs, - vnic_dev_get_intr_coal_timer_max(enic->vdev)); - rx_coalesce_usecs = min_t(u32, ecmd->rx_coalesce_usecs, - vnic_dev_get_intr_coal_timer_max(enic->vdev)); + if (!skb->encapsulation) + return features; - switch (vnic_dev_get_intr_mode(enic->vdev)) { - case VNIC_DEV_INTR_MODE_INTX: - if (tx_coalesce_usecs != rx_coalesce_usecs) - return -EINVAL; + features = vxlan_features_check(skb, features); - intr = enic_legacy_io_intr(); - vnic_intr_coalescing_timer_set(&enic->intr[intr], - tx_coalesce_usecs); + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IPV6): + if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) + goto out; + proto = ipv6_hdr(skb)->nexthdr; break; - case VNIC_DEV_INTR_MODE_MSI: - if (tx_coalesce_usecs != rx_coalesce_usecs) - return -EINVAL; - - vnic_intr_coalescing_timer_set(&enic->intr[0], - tx_coalesce_usecs); + case htons(ETH_P_IP): + proto = ip_hdr(skb)->protocol; break; - case VNIC_DEV_INTR_MODE_MSIX: - for (i = 0; i < enic->wq_count; i++) { - intr = enic_msix_wq_intr(enic, i); - vnic_intr_coalescing_timer_set(&enic->intr[intr], - tx_coalesce_usecs); - } - - for (i = 0; i < enic->rq_count; i++) { - intr = enic_msix_rq_intr(enic, i); - vnic_intr_coalescing_timer_set(&enic->intr[intr], - rx_coalesce_usecs); - } + default: + goto out; + } + switch (eth->h_proto) { + case ntohs(ETH_P_IPV6): + if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6)) + goto out; + fallthrough; + case ntohs(ETH_P_IP): break; default: - break; + goto out; } - enic->tx_coalesce_usecs = tx_coalesce_usecs; - enic->rx_coalesce_usecs = rx_coalesce_usecs; - return 0; -} + if (proto == IPPROTO_UDP) { + udph = udp_hdr(skb); + port = be16_to_cpu(udph->dest); + } -static const struct ethtool_ops enic_ethtool_ops = { - .get_settings = enic_get_settings, - .get_drvinfo = enic_get_drvinfo, - .get_msglevel = enic_get_msglevel, - .set_msglevel = enic_set_msglevel, - .get_link = ethtool_op_get_link, - .get_strings = enic_get_strings, - .get_sset_count = enic_get_sset_count, - .get_ethtool_stats = enic_get_ethtool_stats, - .get_coalesce = enic_get_coalesce, - .set_coalesce = enic_set_coalesce, -}; + /* HW supports offload of only one UDP port. Remove CSUM and GSO MASK + * for other UDP port tunnels + */ + if (port != enic->vxlan.vxlan_udp_port_number) + goto out; -static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf) -{ - struct enic *enic = vnic_dev_priv(wq->vdev); + return features; - if (buf->sop) - pci_unmap_single(enic->pdev, buf->dma_addr, - buf->len, PCI_DMA_TODEVICE); - else - pci_unmap_page(enic->pdev, buf->dma_addr, - buf->len, PCI_DMA_TODEVICE); - - if (buf->os_buf) - dev_kfree_skb_any(buf->os_buf); +out: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } -static void enic_wq_free_buf(struct vnic_wq *wq, - struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque) +int enic_is_dynamic(struct enic *enic) { - enic_free_wq_buf(wq, buf); + return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN; } -static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, - u8 type, u16 q_number, u16 completed_index, void *opaque) +int enic_sriov_enabled(struct enic *enic) { - struct enic *enic = vnic_dev_priv(vdev); - - spin_lock(&enic->wq_lock[q_number]); - - vnic_wq_service(&enic->wq[q_number], cq_desc, - completed_index, enic_wq_free_buf, - opaque); - - if (netif_queue_stopped(enic->netdev) && - vnic_wq_desc_avail(&enic->wq[q_number]) >= - (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)) - netif_wake_queue(enic->netdev); + return (enic->priv_flags & ENIC_SRIOV_ENABLED) ? 1 : 0; +} - spin_unlock(&enic->wq_lock[q_number]); +static int enic_is_sriov_vf(struct enic *enic) +{ + return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF; +} +int enic_is_valid_vf(struct enic *enic, int vf) +{ +#ifdef CONFIG_PCI_IOV + return vf >= 0 && vf < enic->num_vfs; +#else return 0; +#endif } -static void enic_log_q_error(struct enic *enic) +static bool enic_log_q_error(struct enic *enic) { unsigned int i; u32 error_status; + bool err = false; for (i = 0; i < enic->wq_count; i++) { - error_status = vnic_wq_error_status(&enic->wq[i]); + error_status = vnic_wq_error_status(&enic->wq[i].vwq); + err |= error_status; if (error_status) netdev_err(enic->netdev, "WQ[%d] error_status %d\n", i, error_status); } for (i = 0; i < enic->rq_count; i++) { - error_status = vnic_rq_error_status(&enic->rq[i]); + error_status = vnic_rq_error_status(&enic->rq[i].vrq); + err |= error_status; if (error_status) netdev_err(enic->netdev, "RQ[%d] error_status %d\n", i, error_status); } + + return err; } static void enic_msglvl_check(struct enic *enic) @@ -459,6 +377,36 @@ static void enic_mtu_check(struct enic *enic) } } +static void enic_set_rx_coal_setting(struct enic *enic) +{ + unsigned int speed; + int index = -1; + struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; + + /* 1. Read the link speed from fw + * 2. Pick the default range for the speed + * 3. Update it in enic->rx_coalesce_setting + */ + speed = vnic_dev_port_speed(enic->vdev); + if (speed > ENIC_LINK_SPEED_10G) + index = ENIC_LINK_40G_INDEX; + else if (speed > ENIC_LINK_SPEED_4G) + index = ENIC_LINK_10G_INDEX; + else + index = ENIC_LINK_4G_INDEX; + + rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start; + rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start; + rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END; + + /* Start with the value provided by UCSM */ + for (index = 0; index < enic->rq_count; index++) + enic->cq[index].cur_rx_coal_timeval = + enic->config.intr_timer_usec; + + rx_coal->use_adaptive_rx_coalesce = 1; +} + static void enic_link_check(struct enic *enic) { int link_status = vnic_dev_link_status(enic->vdev); @@ -467,6 +415,7 @@ static void enic_link_check(struct enic *enic) if (link_status && !carrier_ok) { netdev_info(enic->netdev, "Link UP\n"); netif_carrier_on(enic->netdev); + enic_set_rx_coal_setting(enic); } else if (!link_status && carrier_ok) { netdev_info(enic->netdev, "Link DOWN\n"); netif_carrier_off(enic->netdev); @@ -486,9 +435,9 @@ static irqreturn_t enic_isr_legacy(int irq, void *data) { struct net_device *netdev = data; struct enic *enic = netdev_priv(netdev); - unsigned int io_intr = enic_legacy_io_intr(); - unsigned int err_intr = enic_legacy_err_intr(); - unsigned int notify_intr = enic_legacy_notify_intr(); + unsigned int io_intr = ENIC_LEGACY_IO_INTR; + unsigned int err_intr = ENIC_LEGACY_ERR_INTR; + unsigned int notify_intr = ENIC_LEGACY_NOTIFY_INTR; u32 pba; vnic_intr_mask(&enic->intr[io_intr]); @@ -500,8 +449,8 @@ static irqreturn_t enic_isr_legacy(int irq, void *data) } if (ENIC_TEST_INTR(pba, notify_intr)) { - vnic_intr_return_all_credits(&enic->intr[notify_intr]); enic_notify_check(enic); + vnic_intr_return_all_credits(&enic->intr[notify_intr]); } if (ENIC_TEST_INTR(pba, err_intr)) { @@ -512,12 +461,10 @@ static irqreturn_t enic_isr_legacy(int irq, void *data) return IRQ_HANDLED; } - if (ENIC_TEST_INTR(pba, io_intr)) { - if (napi_schedule_prep(&enic->napi[0])) - __napi_schedule(&enic->napi[0]); - } else { + if (ENIC_TEST_INTR(pba, io_intr)) + napi_schedule_irqoff(&enic->napi[0]); + else vnic_intr_unmask(&enic->intr[io_intr]); - } return IRQ_HANDLED; } @@ -542,36 +489,16 @@ static irqreturn_t enic_isr_msi(int irq, void *data) * writes). */ - napi_schedule(&enic->napi[0]); + napi_schedule_irqoff(&enic->napi[0]); return IRQ_HANDLED; } -static irqreturn_t enic_isr_msix_rq(int irq, void *data) +static irqreturn_t enic_isr_msix(int irq, void *data) { struct napi_struct *napi = data; - /* schedule NAPI polling for RQ cleanup */ - napi_schedule(napi); - - return IRQ_HANDLED; -} - -static irqreturn_t enic_isr_msix_wq(int irq, void *data) -{ - struct enic *enic = data; - unsigned int cq = enic_cq_wq(enic, 0); - unsigned int intr = enic_msix_wq_intr(enic, 0); - unsigned int wq_work_to_do = -1; /* no limit */ - unsigned int wq_work_done; - - wq_work_done = vnic_cq_service(&enic->cq[cq], - wq_work_to_do, enic_wq_service, NULL); - - vnic_intr_return_credits(&enic->intr[intr], - wq_work_done, - 1 /* unmask intr */, - 1 /* reset intr timer */); + napi_schedule_irqoff(napi); return IRQ_HANDLED; } @@ -583,10 +510,9 @@ static irqreturn_t enic_isr_msix_err(int irq, void *data) vnic_intr_return_all_credits(&enic->intr[intr]); - enic_log_q_error(enic); - - /* schedule recovery from WQ/RQ error */ - schedule_work(&enic->reset); + if (enic_log_q_error(enic)) + /* schedule recovery from WQ/RQ error */ + schedule_work(&enic->reset); return IRQ_HANDLED; } @@ -596,96 +522,131 @@ static irqreturn_t enic_isr_msix_notify(int irq, void *data) struct enic *enic = data; unsigned int intr = enic_msix_notify_intr(enic); - vnic_intr_return_all_credits(&enic->intr[intr]); enic_notify_check(enic); + vnic_intr_return_all_credits(&enic->intr[intr]); return IRQ_HANDLED; } -static inline void enic_queue_wq_skb_cont(struct enic *enic, - struct vnic_wq *wq, struct sk_buff *skb, - unsigned int len_left, int loopback) +static int enic_queue_wq_skb_cont(struct enic *enic, struct vnic_wq *wq, + struct sk_buff *skb, unsigned int len_left, + int loopback) { const skb_frag_t *frag; + dma_addr_t dma_addr; /* Queue additional data fragments */ for (frag = skb_shinfo(skb)->frags; len_left; frag++) { len_left -= skb_frag_size(frag); - enic_queue_wq_desc_cont(wq, skb, - skb_frag_dma_map(&enic->pdev->dev, - frag, 0, skb_frag_size(frag), - DMA_TO_DEVICE), - skb_frag_size(frag), - (len_left == 0), /* EOP? */ - loopback); + dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag, 0, + skb_frag_size(frag), + DMA_TO_DEVICE); + if (unlikely(enic_dma_map_check(enic, dma_addr))) + return -ENOMEM; + enic_queue_wq_desc_cont(wq, skb, dma_addr, skb_frag_size(frag), + (len_left == 0), /* EOP? */ + loopback); } + + return 0; } -static inline void enic_queue_wq_skb_vlan(struct enic *enic, - struct vnic_wq *wq, struct sk_buff *skb, - int vlan_tag_insert, unsigned int vlan_tag, int loopback) +static int enic_queue_wq_skb_vlan(struct enic *enic, struct vnic_wq *wq, + struct sk_buff *skb, int vlan_tag_insert, + unsigned int vlan_tag, int loopback) { unsigned int head_len = skb_headlen(skb); unsigned int len_left = skb->len - head_len; int eop = (len_left == 0); + dma_addr_t dma_addr; + int err = 0; + + dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, + DMA_TO_DEVICE); + if (unlikely(enic_dma_map_check(enic, dma_addr))) + return -ENOMEM; /* Queue the main skb fragment. The fragments are no larger * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor * per fragment is queued. */ - enic_queue_wq_desc(wq, skb, - pci_map_single(enic->pdev, skb->data, - head_len, PCI_DMA_TODEVICE), - head_len, - vlan_tag_insert, vlan_tag, - eop, loopback); + enic_queue_wq_desc(wq, skb, dma_addr, head_len, vlan_tag_insert, + vlan_tag, eop, loopback); if (!eop) - enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + + /* The enic_queue_wq_desc() above does not do HW checksum */ + enic->wq[wq->index].stats.csum_none++; + enic->wq[wq->index].stats.packets++; + enic->wq[wq->index].stats.bytes += skb->len; + + return err; } -static inline void enic_queue_wq_skb_csum_l4(struct enic *enic, - struct vnic_wq *wq, struct sk_buff *skb, - int vlan_tag_insert, unsigned int vlan_tag, int loopback) +static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq, + struct sk_buff *skb, int vlan_tag_insert, + unsigned int vlan_tag, int loopback) { unsigned int head_len = skb_headlen(skb); unsigned int len_left = skb->len - head_len; unsigned int hdr_len = skb_checksum_start_offset(skb); unsigned int csum_offset = hdr_len + skb->csum_offset; int eop = (len_left == 0); + dma_addr_t dma_addr; + int err = 0; + + dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, + DMA_TO_DEVICE); + if (unlikely(enic_dma_map_check(enic, dma_addr))) + return -ENOMEM; /* Queue the main skb fragment. The fragments are no larger * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor * per fragment is queued. */ - enic_queue_wq_desc_csum_l4(wq, skb, - pci_map_single(enic->pdev, skb->data, - head_len, PCI_DMA_TODEVICE), - head_len, - csum_offset, - hdr_len, - vlan_tag_insert, vlan_tag, - eop, loopback); + enic_queue_wq_desc_csum_l4(wq, skb, dma_addr, head_len, csum_offset, + hdr_len, vlan_tag_insert, vlan_tag, eop, + loopback); if (!eop) - enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + + enic->wq[wq->index].stats.csum_partial++; + enic->wq[wq->index].stats.packets++; + enic->wq[wq->index].stats.bytes += skb->len; + + return err; } -static inline void enic_queue_wq_skb_tso(struct enic *enic, - struct vnic_wq *wq, struct sk_buff *skb, unsigned int mss, - int vlan_tag_insert, unsigned int vlan_tag, int loopback) +static void enic_preload_tcp_csum_encap(struct sk_buff *skb) { - unsigned int frag_len_left = skb_headlen(skb); - unsigned int len_left = skb->len - frag_len_left; - unsigned int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); - int eop = (len_left == 0); - unsigned int len; - dma_addr_t dma_addr; - unsigned int offset = 0; - skb_frag_t *frag; + const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); + switch (eth->h_proto) { + case ntohs(ETH_P_IP): + inner_ip_hdr(skb)->check = 0; + inner_tcp_hdr(skb)->check = + ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr, + inner_ip_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + break; + case ntohs(ETH_P_IPV6): + inner_tcp_hdr(skb)->check = + ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr, + &inner_ipv6_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + break; + default: + WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload"); + break; + } +} + +static void enic_preload_tcp_csum(struct sk_buff *skb) +{ /* Preload TCP csum field with IP pseudo hdr calculated * with IP length set to zero. HW will later add in length * to each TCP segment resulting from the TSO. @@ -696,8 +657,33 @@ static inline void enic_queue_wq_skb_tso(struct enic *enic, tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) { - tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + tcp_v6_gso_csum_prep(skb); + } +} + +static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, + struct sk_buff *skb, unsigned int mss, + int vlan_tag_insert, unsigned int vlan_tag, + int loopback) +{ + unsigned int frag_len_left = skb_headlen(skb); + unsigned int len_left = skb->len - frag_len_left; + int eop = (len_left == 0); + unsigned int offset = 0; + unsigned int hdr_len; + dma_addr_t dma_addr; + unsigned int pkts; + unsigned int len; + skb_frag_t *frag; + + if (skb->encapsulation) { + hdr_len = skb_inner_tcp_all_headers(skb); + enic_preload_tcp_csum_encap(skb); + enic->wq[wq->index].stats.encap_tso++; + } else { + hdr_len = skb_tcp_all_headers(skb); + enic_preload_tcp_csum(skb); + enic->wq[wq->index].stats.tso++; } /* Queue WQ_ENET_MAX_DESC_LEN length descriptors @@ -705,20 +691,20 @@ static inline void enic_queue_wq_skb_tso(struct enic *enic, */ while (frag_len_left) { len = min(frag_len_left, (unsigned int)WQ_ENET_MAX_DESC_LEN); - dma_addr = pci_map_single(enic->pdev, skb->data + offset, - len, PCI_DMA_TODEVICE); - enic_queue_wq_desc_tso(wq, skb, - dma_addr, - len, - mss, hdr_len, - vlan_tag_insert, vlan_tag, - eop && (len == frag_len_left), loopback); + dma_addr = dma_map_single(&enic->pdev->dev, + skb->data + offset, len, + DMA_TO_DEVICE); + if (unlikely(enic_dma_map_check(enic, dma_addr))) + return -ENOMEM; + enic_queue_wq_desc_tso(wq, skb, dma_addr, len, mss, hdr_len, + vlan_tag_insert, vlan_tag, + eop && (len == frag_len_left), loopback); frag_len_left -= len; offset += len; } if (eop) - return; + goto tso_out_stats; /* Queue WQ_ENET_MAX_DESC_LEN length descriptors * for additional data fragments @@ -734,44 +720,113 @@ static inline void enic_queue_wq_skb_tso(struct enic *enic, dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag, offset, len, DMA_TO_DEVICE); - enic_queue_wq_desc_cont(wq, skb, - dma_addr, - len, - (len_left == 0) && - (len == frag_len_left), /* EOP? */ - loopback); + if (unlikely(enic_dma_map_check(enic, dma_addr))) + return -ENOMEM; + enic_queue_wq_desc_cont(wq, skb, dma_addr, len, + (len_left == 0) && + (len == frag_len_left),/*EOP*/ + loopback); frag_len_left -= len; offset += len; } } + +tso_out_stats: + /* calculate how many packets tso sent */ + len = skb->len - hdr_len; + pkts = len / mss; + if ((len % mss) > 0) + pkts++; + enic->wq[wq->index].stats.packets += pkts; + enic->wq[wq->index].stats.bytes += (len + (pkts * hdr_len)); + + return 0; } -static inline void enic_queue_wq_skb(struct enic *enic, +static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq, + struct sk_buff *skb, + int vlan_tag_insert, + unsigned int vlan_tag, int loopback) +{ + unsigned int head_len = skb_headlen(skb); + unsigned int len_left = skb->len - head_len; + /* Hardware will overwrite the checksum fields, calculating from + * scratch and ignoring the value placed by software. + * Offload mode = 00 + * mss[2], mss[1], mss[0] bits are set + */ + unsigned int mss_or_csum = 7; + int eop = (len_left == 0); + dma_addr_t dma_addr; + int err = 0; + + dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, + DMA_TO_DEVICE); + if (unlikely(enic_dma_map_check(enic, dma_addr))) + return -ENOMEM; + + enic_queue_wq_desc_ex(wq, skb, dma_addr, head_len, mss_or_csum, 0, + vlan_tag_insert, vlan_tag, + WQ_ENET_OFFLOAD_MODE_CSUM, eop, 1 /* SOP */, eop, + loopback); + if (!eop) + err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + + enic->wq[wq->index].stats.encap_csum++; + enic->wq[wq->index].stats.packets++; + enic->wq[wq->index].stats.bytes += skb->len; + + return err; +} + +static inline int enic_queue_wq_skb(struct enic *enic, struct vnic_wq *wq, struct sk_buff *skb) { unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int vlan_tag = 0; int vlan_tag_insert = 0; int loopback = 0; + int err; - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { /* VLAN tag from trunking driver */ vlan_tag_insert = 1; - vlan_tag = vlan_tx_tag_get(skb); + vlan_tag = skb_vlan_tag_get(skb); + enic->wq[wq->index].stats.add_vlan++; } else if (enic->loop_enable) { vlan_tag = enic->loop_tag; loopback = 1; } if (mss) - enic_queue_wq_skb_tso(enic, wq, skb, mss, - vlan_tag_insert, vlan_tag, loopback); - else if (skb->ip_summed == CHECKSUM_PARTIAL) - enic_queue_wq_skb_csum_l4(enic, wq, skb, - vlan_tag_insert, vlan_tag, loopback); + err = enic_queue_wq_skb_tso(enic, wq, skb, mss, + vlan_tag_insert, vlan_tag, + loopback); + else if (skb->encapsulation) + err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert, + vlan_tag, loopback); + else if (skb->ip_summed == CHECKSUM_PARTIAL) + err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert, + vlan_tag, loopback); else - enic_queue_wq_skb_vlan(enic, wq, skb, - vlan_tag_insert, vlan_tag, loopback); + err = enic_queue_wq_skb_vlan(enic, wq, skb, vlan_tag_insert, + vlan_tag, loopback); + if (unlikely(err)) { + struct vnic_wq_buf *buf; + + buf = wq->to_use->prev; + /* while not EOP of previous pkt && queue not empty. + * For all non EOP bufs, os_buf is NULL. + */ + while (!buf->os_buf && (buf->next != wq->to_clean)) { + enic_free_wq_buf(wq, buf); + wq->ring.desc_avail++; + buf = buf->prev; + } + wq->to_use = buf->next; + dev_kfree_skb(skb); + } + return err; } /* netif_tx_lock held, process context with BHs disabled, or BH */ @@ -779,14 +834,21 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct enic *enic = netdev_priv(netdev); - struct vnic_wq *wq = &enic->wq[0]; - unsigned long flags; + struct vnic_wq *wq; + unsigned int txq_map; + struct netdev_queue *txq; + + txq_map = skb_get_queue_mapping(skb) % enic->wq_count; + wq = &enic->wq[txq_map].vwq; if (skb->len <= 0) { - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); + enic->wq[wq->index].stats.null_pkt++; return NETDEV_TX_OK; } + txq = netdev_get_tx_queue(netdev, txq_map); + /* Non-TSO sends must fit within ENIC_NON_TSO_MAX_DESC descs, * which is very likely. In the off chance it's going to take * more than * ENIC_NON_TSO_MAX_DESC, linearize the skb. @@ -795,39 +857,58 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, if (skb_shinfo(skb)->gso_size == 0 && skb_shinfo(skb)->nr_frags + 1 > ENIC_NON_TSO_MAX_DESC && skb_linearize(skb)) { - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); + enic->wq[wq->index].stats.skb_linear_fail++; return NETDEV_TX_OK; } - spin_lock_irqsave(&enic->wq_lock[0], flags); + spin_lock(&enic->wq[txq_map].lock); if (vnic_wq_desc_avail(wq) < skb_shinfo(skb)->nr_frags + ENIC_DESC_MAX_SPLITS) { - netif_stop_queue(netdev); + netif_tx_stop_queue(txq); /* This is a hard error, log it */ netdev_err(netdev, "BUG! Tx ring full when queue awake!\n"); - spin_unlock_irqrestore(&enic->wq_lock[0], flags); + spin_unlock(&enic->wq[txq_map].lock); + enic->wq[wq->index].stats.desc_full_awake++; return NETDEV_TX_BUSY; } - enic_queue_wq_skb(enic, wq, skb); + if (enic_queue_wq_skb(enic, wq, skb)) + goto error; - if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS) - netif_stop_queue(netdev); + if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS) { + netif_tx_stop_queue(txq); + enic->wq[wq->index].stats.stopped++; + } + skb_tx_timestamp(skb); + if (!netdev_xmit_more() || netif_xmit_stopped(txq)) + vnic_wq_doorbell(wq); - spin_unlock_irqrestore(&enic->wq_lock[0], flags); +error: + spin_unlock(&enic->wq[txq_map].lock); return NETDEV_TX_OK; } -/* dev_base_lock rwlock held, nominally process context */ -static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev, - struct rtnl_link_stats64 *net_stats) +/* rcu_read_lock potentially held, nominally process context */ +static void enic_get_stats(struct net_device *netdev, + struct rtnl_link_stats64 *net_stats) { struct enic *enic = netdev_priv(netdev); struct vnic_stats *stats; + u64 pkt_truncated = 0; + u64 bad_fcs = 0; + int err; + int i; - enic_dev_stats_dump(enic, &stats); + err = enic_dev_stats_dump(enic, &stats); + /* return only when dma_alloc_coherent fails in vnic_dev_stats_dump + * For other failures, like devcmd failure, we return previously + * recorded stats. + */ + if (err == -ENOMEM) + return; net_stats->tx_packets = stats->tx.tx_frames_ok; net_stats->tx_bytes = stats->tx.tx_bytes_ok; @@ -838,15 +919,85 @@ static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev, net_stats->rx_bytes = stats->rx.rx_bytes_ok; net_stats->rx_errors = stats->rx.rx_errors; net_stats->multicast = stats->rx.rx_multicast_frames_ok; - net_stats->rx_over_errors = enic->rq_truncated_pkts; - net_stats->rx_crc_errors = enic->rq_bad_fcs; + + for (i = 0; i < enic->rq_count; i++) { + struct enic_rq_stats *rqs = &enic->rq[i].stats; + + if (!enic->rq[i].vrq.ctrl) + break; + pkt_truncated += rqs->pkt_truncated; + bad_fcs += rqs->bad_fcs; + } + net_stats->rx_over_errors = pkt_truncated; + net_stats->rx_crc_errors = bad_fcs; net_stats->rx_dropped = stats->rx.rx_no_bufs + stats->rx.rx_drop; +} + +static int enic_mc_sync(struct net_device *netdev, const u8 *mc_addr) +{ + struct enic *enic = netdev_priv(netdev); + + if (enic->mc_count == ENIC_MULTICAST_PERFECT_FILTERS) { + unsigned int mc_count = netdev_mc_count(netdev); + + netdev_warn(netdev, "Registering only %d out of %d multicast addresses\n", + ENIC_MULTICAST_PERFECT_FILTERS, mc_count); + + return -ENOSPC; + } + + enic_dev_add_addr(enic, mc_addr); + enic->mc_count++; + + return 0; +} + +static int enic_mc_unsync(struct net_device *netdev, const u8 *mc_addr) +{ + struct enic *enic = netdev_priv(netdev); + + enic_dev_del_addr(enic, mc_addr); + enic->mc_count--; - return net_stats; + return 0; +} + +static int enic_uc_sync(struct net_device *netdev, const u8 *uc_addr) +{ + struct enic *enic = netdev_priv(netdev); + + if (enic->uc_count == ENIC_UNICAST_PERFECT_FILTERS) { + unsigned int uc_count = netdev_uc_count(netdev); + + netdev_warn(netdev, "Registering only %d out of %d unicast addresses\n", + ENIC_UNICAST_PERFECT_FILTERS, uc_count); + + return -ENOSPC; + } + + enic_dev_add_addr(enic, uc_addr); + enic->uc_count++; + + return 0; +} + +static int enic_uc_unsync(struct net_device *netdev, const u8 *uc_addr) +{ + struct enic *enic = netdev_priv(netdev); + + enic_dev_del_addr(enic, uc_addr); + enic->uc_count--; + + return 0; } void enic_reset_addr_lists(struct enic *enic) { + struct net_device *netdev = enic->netdev; + + __dev_uc_unsync(netdev, NULL); + __dev_mc_unsync(netdev, NULL); + enic->mc_count = 0; enic->uc_count = 0; enic->flags = 0; @@ -864,7 +1015,7 @@ static int enic_set_mac_addr(struct net_device *netdev, char *addr) return -EADDRNOTAVAIL; } - memcpy(netdev->dev_addr, addr, netdev->addr_len); + eth_hw_addr_set(netdev, addr); return 0; } @@ -913,112 +1064,6 @@ static int enic_set_mac_address(struct net_device *netdev, void *p) return enic_dev_add_station_addr(enic); } -static void enic_update_multicast_addr_list(struct enic *enic) -{ - struct net_device *netdev = enic->netdev; - struct netdev_hw_addr *ha; - unsigned int mc_count = netdev_mc_count(netdev); - u8 mc_addr[ENIC_MULTICAST_PERFECT_FILTERS][ETH_ALEN]; - unsigned int i, j; - - if (mc_count > ENIC_MULTICAST_PERFECT_FILTERS) { - netdev_warn(netdev, "Registering only %d out of %d " - "multicast addresses\n", - ENIC_MULTICAST_PERFECT_FILTERS, mc_count); - mc_count = ENIC_MULTICAST_PERFECT_FILTERS; - } - - /* Is there an easier way? Trying to minimize to - * calls to add/del multicast addrs. We keep the - * addrs from the last call in enic->mc_addr and - * look for changes to add/del. - */ - - i = 0; - netdev_for_each_mc_addr(ha, netdev) { - if (i == mc_count) - break; - memcpy(mc_addr[i++], ha->addr, ETH_ALEN); - } - - for (i = 0; i < enic->mc_count; i++) { - for (j = 0; j < mc_count; j++) - if (ether_addr_equal(enic->mc_addr[i], mc_addr[j])) - break; - if (j == mc_count) - enic_dev_del_addr(enic, enic->mc_addr[i]); - } - - for (i = 0; i < mc_count; i++) { - for (j = 0; j < enic->mc_count; j++) - if (ether_addr_equal(mc_addr[i], enic->mc_addr[j])) - break; - if (j == enic->mc_count) - enic_dev_add_addr(enic, mc_addr[i]); - } - - /* Save the list to compare against next time - */ - - for (i = 0; i < mc_count; i++) - memcpy(enic->mc_addr[i], mc_addr[i], ETH_ALEN); - - enic->mc_count = mc_count; -} - -static void enic_update_unicast_addr_list(struct enic *enic) -{ - struct net_device *netdev = enic->netdev; - struct netdev_hw_addr *ha; - unsigned int uc_count = netdev_uc_count(netdev); - u8 uc_addr[ENIC_UNICAST_PERFECT_FILTERS][ETH_ALEN]; - unsigned int i, j; - - if (uc_count > ENIC_UNICAST_PERFECT_FILTERS) { - netdev_warn(netdev, "Registering only %d out of %d " - "unicast addresses\n", - ENIC_UNICAST_PERFECT_FILTERS, uc_count); - uc_count = ENIC_UNICAST_PERFECT_FILTERS; - } - - /* Is there an easier way? Trying to minimize to - * calls to add/del unicast addrs. We keep the - * addrs from the last call in enic->uc_addr and - * look for changes to add/del. - */ - - i = 0; - netdev_for_each_uc_addr(ha, netdev) { - if (i == uc_count) - break; - memcpy(uc_addr[i++], ha->addr, ETH_ALEN); - } - - for (i = 0; i < enic->uc_count; i++) { - for (j = 0; j < uc_count; j++) - if (ether_addr_equal(enic->uc_addr[i], uc_addr[j])) - break; - if (j == uc_count) - enic_dev_del_addr(enic, enic->uc_addr[i]); - } - - for (i = 0; i < uc_count; i++) { - for (j = 0; j < enic->uc_count; j++) - if (ether_addr_equal(uc_addr[i], enic->uc_addr[j])) - break; - if (j == enic->uc_count) - enic_dev_add_addr(enic, uc_addr[i]); - } - - /* Save the list to compare against next time - */ - - for (i = 0; i < uc_count; i++) - memcpy(enic->uc_addr[i], uc_addr[i], ETH_ALEN); - - enic->uc_count = uc_count; -} - /* netif_tx_lock held, BHs disabled */ static void enic_set_rx_mode(struct net_device *netdev) { @@ -1041,17 +1086,17 @@ static void enic_set_rx_mode(struct net_device *netdev) } if (!promisc) { - enic_update_unicast_addr_list(enic); + __dev_uc_sync(netdev, enic_uc_sync, enic_uc_unsync); if (!allmulti) - enic_update_multicast_addr_list(enic); + __dev_mc_sync(netdev, enic_mc_sync, enic_mc_unsync); } } /* netif_tx_lock held, BHs disabled */ -static void enic_tx_timeout(struct net_device *netdev) +static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct enic *enic = netdev_priv(netdev); - schedule_work(&enic->reset); + schedule_work(&enic->tx_hang_reset); } static int enic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) @@ -1083,6 +1128,7 @@ static int enic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) static int enic_set_vf_port(struct net_device *netdev, int vf, struct nlattr *port[]) { + static const u8 zero_addr[ETH_ALEN] = {}; struct enic *enic = netdev_priv(netdev); struct enic_port_profile prev_pp; struct enic_port_profile *pp; @@ -1102,18 +1148,30 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]); if (port[IFLA_PORT_PROFILE]) { + if (nla_len(port[IFLA_PORT_PROFILE]) != PORT_PROFILE_MAX) { + memcpy(pp, &prev_pp, sizeof(*pp)); + return -EINVAL; + } pp->set |= ENIC_SET_NAME; memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]), PORT_PROFILE_MAX); } if (port[IFLA_PORT_INSTANCE_UUID]) { + if (nla_len(port[IFLA_PORT_INSTANCE_UUID]) != PORT_UUID_MAX) { + memcpy(pp, &prev_pp, sizeof(*pp)); + return -EINVAL; + } pp->set |= ENIC_SET_INSTANCE; memcpy(pp->instance_uuid, nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX); } if (port[IFLA_PORT_HOST_UUID]) { + if (nla_len(port[IFLA_PORT_HOST_UUID]) != PORT_UUID_MAX) { + memcpy(pp, &prev_pp, sizeof(*pp)); + return -EINVAL; + } pp->set |= ENIC_SET_HOST; memcpy(pp->host_uuid, nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX); @@ -1147,7 +1205,7 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, } else { memset(pp, 0, sizeof(*pp)); if (vf == PORT_SELF_VF) - memset(netdev->dev_addr, 0, ETH_ALEN); + eth_hw_addr_set(netdev, zero_addr); } } else { /* Set flag to indicate that the port assoc/disassoc @@ -1157,14 +1215,14 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, /* If DISASSOCIATE, clean up all assigned/saved macaddresses */ if (pp->request == PORT_REQUEST_DISASSOCIATE) { - memset(pp->mac_addr, 0, ETH_ALEN); + eth_zero_addr(pp->mac_addr); if (vf == PORT_SELF_VF) - memset(netdev->dev_addr, 0, ETH_ALEN); + eth_hw_addr_set(netdev, zero_addr); } } if (vf == PORT_SELF_VF) - memset(pp->vf_mac, 0, ETH_ALEN); + eth_zero_addr(pp->vf_mac); return err; } @@ -1204,127 +1262,62 @@ nla_put_failure: return -EMSGSIZE; } -static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) +static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq) { - struct enic *enic = vnic_dev_priv(rq->vdev); - - if (!buf->os_buf) - return; - - pci_unmap_single(enic->pdev, buf->dma_addr, - buf->len, PCI_DMA_FROMDEVICE); - dev_kfree_skb_any(buf->os_buf); -} - -static int enic_rq_alloc_buf(struct vnic_rq *rq) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - struct net_device *netdev = enic->netdev; - struct sk_buff *skb; - unsigned int len = netdev->mtu + VLAN_ETH_HLEN; - unsigned int os_buf_index = 0; - dma_addr_t dma_addr; - - skb = netdev_alloc_skb_ip_align(netdev, len); - if (!skb) - return -ENOMEM; + unsigned int intr = enic_msix_rq_intr(enic, rq->index); + struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; + u32 timer = cq->tobe_rx_coal_timeval; - dma_addr = pci_map_single(enic->pdev, skb->data, - len, PCI_DMA_FROMDEVICE); - - enic_queue_rq_desc(rq, skb, os_buf_index, - dma_addr, len); - - return 0; + if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) { + vnic_intr_coalescing_timer_set(&enic->intr[intr], timer); + cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval; + } } -static void enic_rq_indicate_buf(struct vnic_rq *rq, - struct cq_desc *cq_desc, struct vnic_rq_buf *buf, - int skipped, void *opaque) +static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq) { - struct enic *enic = vnic_dev_priv(rq->vdev); - struct net_device *netdev = enic->netdev; - struct sk_buff *skb; - - u8 type, color, eop, sop, ingress_port, vlan_stripped; - u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof; - u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; - u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc; - u8 packet_error; - u16 q_number, completed_index, bytes_written, vlan_tci, checksum; - u32 rss_hash; + struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; + struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; + struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter; + int index; + u32 timer; + u32 range_start; + u32 traffic; + u64 delta; + ktime_t now = ktime_get(); - if (skipped) + delta = ktime_us_delta(now, cq->prev_ts); + if (delta < ENIC_AIC_TS_BREAK) return; + cq->prev_ts = now; + + traffic = pkt_size_counter->large_pkt_bytes_cnt + + pkt_size_counter->small_pkt_bytes_cnt; + /* The table takes Mbps + * traffic *= 8 => bits + * traffic *= (10^6 / delta) => bps + * traffic /= 10^6 => Mbps + * + * Combining, traffic *= (8 / delta) + */ - skb = buf->os_buf; - prefetch(skb->data - NET_IP_ALIGN); - pci_unmap_single(enic->pdev, buf->dma_addr, - buf->len, PCI_DMA_FROMDEVICE); - - cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, - &type, &color, &q_number, &completed_index, - &ingress_port, &fcoe, &eop, &sop, &rss_type, - &csum_not_calc, &rss_hash, &bytes_written, - &packet_error, &vlan_stripped, &vlan_tci, &checksum, - &fcoe_sof, &fcoe_fc_crc_ok, &fcoe_enc_error, - &fcoe_eof, &tcp_udp_csum_ok, &udp, &tcp, - &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment, - &fcs_ok); - - if (packet_error) { - - if (!fcs_ok) { - if (bytes_written > 0) - enic->rq_bad_fcs++; - else if (bytes_written == 0) - enic->rq_truncated_pkts++; - } - - dev_kfree_skb_any(skb); - - return; - } - - if (eop && bytes_written > 0) { - - /* Good receive - */ - - skb_put(skb, bytes_written); - skb->protocol = eth_type_trans(skb, netdev); - - if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc) { - skb->csum = htons(checksum); - skb->ip_summed = CHECKSUM_COMPLETE; - } - - if (vlan_stripped) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); - - if (netdev->features & NETIF_F_GRO) - napi_gro_receive(&enic->napi[q_number], skb); - else - netif_receive_skb(skb); - } else { - - /* Buffer overflow - */ - - dev_kfree_skb_any(skb); - } -} - -static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, - u8 type, u16 q_number, u16 completed_index, void *opaque) -{ - struct enic *enic = vnic_dev_priv(vdev); + traffic <<= 3; + traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta; - vnic_rq_service(&enic->rq[q_number], cq_desc, - completed_index, VNIC_RQ_RETURN_DESC, - enic_rq_indicate_buf, opaque); + for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++) + if (traffic < mod_table[index].rx_rate) + break; + range_start = (pkt_size_counter->small_pkt_bytes_cnt > + pkt_size_counter->large_pkt_bytes_cnt << 1) ? + rx_coal->small_pkt_range_start : + rx_coal->large_pkt_range_start; + timer = range_start + ((rx_coal->range_end - range_start) * + mod_table[index].range_percent / 100); + /* Damping */ + cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1; - return 0; + pkt_size_counter->large_pkt_bytes_cnt = 0; + pkt_size_counter->small_pkt_bytes_cnt = 0; } static int enic_poll(struct napi_struct *napi, int budget) @@ -1333,20 +1326,16 @@ static int enic_poll(struct napi_struct *napi, int budget) struct enic *enic = netdev_priv(netdev); unsigned int cq_rq = enic_cq_rq(enic, 0); unsigned int cq_wq = enic_cq_wq(enic, 0); - unsigned int intr = enic_legacy_io_intr(); + unsigned int intr = ENIC_LEGACY_IO_INTR; unsigned int rq_work_to_do = budget; - unsigned int wq_work_to_do = -1; /* no limit */ - unsigned int work_done, rq_work_done, wq_work_done; + unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; + unsigned int work_done, rq_work_done = 0, wq_work_done; int err; - /* Service RQ (first) and WQ - */ - - rq_work_done = vnic_cq_service(&enic->cq[cq_rq], - rq_work_to_do, enic_rq_service, NULL); + wq_work_done = enic_wq_cq_service(enic, cq_wq, wq_work_to_do); - wq_work_done = vnic_cq_service(&enic->cq[cq_wq], - wq_work_to_do, enic_wq_service, NULL); + if (budget > 0) + rq_work_done = enic_rq_cq_service(enic, cq_rq, rq_work_to_do); /* Accumulate intr event credits for this polling * cycle. An intr event is the completion of a @@ -1361,7 +1350,7 @@ static int enic_poll(struct napi_struct *napi, int budget) 0 /* don't unmask intr */, 0 /* don't reset intr timer */); - err = vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); + err = vnic_rq_fill(&enic->rq[0].vrq, enic_rq_alloc_buf); /* Buffer allocation failed. Stay in polling * mode so we can try to fill the ring again. @@ -1369,21 +1358,98 @@ static int enic_poll(struct napi_struct *napi, int budget) if (err) rq_work_done = rq_work_to_do; + if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) + /* Call the function which refreshes the intr coalescing timer + * value based on the traffic. + */ + enic_calc_int_moderation(enic, &enic->rq[0].vrq); - if (rq_work_done < rq_work_to_do) { + if ((rq_work_done < budget) && napi_complete_done(napi, rq_work_done)) { /* Some work done, but not enough to stay in polling, * exit polling */ - napi_complete(napi); + if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) + enic_set_int_moderation(enic, &enic->rq[0].vrq); vnic_intr_unmask(&enic->intr[intr]); + enic->rq[0].stats.napi_complete++; + } else { + enic->rq[0].stats.napi_repoll++; } return rq_work_done; } -static int enic_poll_msix(struct napi_struct *napi, int budget) +#ifdef CONFIG_RFS_ACCEL +static void enic_free_rx_cpu_rmap(struct enic *enic) +{ + free_irq_cpu_rmap(enic->netdev->rx_cpu_rmap); + enic->netdev->rx_cpu_rmap = NULL; +} + +static void enic_set_rx_cpu_rmap(struct enic *enic) +{ + int i, res; + + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) { + enic->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(enic->rq_count); + if (unlikely(!enic->netdev->rx_cpu_rmap)) + return; + for (i = 0; i < enic->rq_count; i++) { + res = irq_cpu_rmap_add(enic->netdev->rx_cpu_rmap, + enic->msix_entry[i].vector); + if (unlikely(res)) { + enic_free_rx_cpu_rmap(enic); + return; + } + } + } +} + +#else + +static void enic_free_rx_cpu_rmap(struct enic *enic) +{ +} + +static void enic_set_rx_cpu_rmap(struct enic *enic) +{ +} + +#endif /* CONFIG_RFS_ACCEL */ + +static int enic_poll_msix_wq(struct napi_struct *napi, int budget) +{ + struct net_device *netdev = napi->dev; + struct enic *enic = netdev_priv(netdev); + unsigned int wq_index = (napi - &enic->napi[0]) - enic->rq_count; + struct vnic_wq *wq = &enic->wq[wq_index].vwq; + unsigned int cq; + unsigned int intr; + unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; + unsigned int wq_work_done; + unsigned int wq_irq; + + wq_irq = wq->index; + cq = enic_cq_wq(enic, wq_irq); + intr = enic_msix_wq_intr(enic, wq_irq); + + wq_work_done = enic_wq_cq_service(enic, cq, wq_work_to_do); + + vnic_intr_return_credits(&enic->intr[intr], wq_work_done, + 0 /* don't unmask intr */, + 1 /* reset intr timer */); + if (!wq_work_done) { + napi_complete(napi); + vnic_intr_unmask(&enic->intr[intr]); + return 0; + } + + return budget; +} + +static int enic_poll_msix_rq(struct napi_struct *napi, int budget) { struct net_device *netdev = napi->dev; struct enic *enic = netdev_priv(netdev); @@ -1391,14 +1457,14 @@ static int enic_poll_msix(struct napi_struct *napi, int budget) unsigned int cq = enic_cq_rq(enic, rq); unsigned int intr = enic_msix_rq_intr(enic, rq); unsigned int work_to_do = budget; - unsigned int work_done; + unsigned int work_done = 0; int err; /* Service RQ */ - work_done = vnic_cq_service(&enic->cq[cq], - work_to_do, enic_rq_service, NULL); + if (budget > 0) + work_done = enic_rq_cq_service(enic, cq, work_to_do); /* Return intr event credits for this polling * cycle. An intr event is the completion of a @@ -1411,7 +1477,7 @@ static int enic_poll_msix(struct napi_struct *napi, int budget) 0 /* don't unmask intr */, 0 /* don't reset intr timer */); - err = vnic_rq_fill(&enic->rq[rq], enic_rq_alloc_buf); + err = vnic_rq_fill(&enic->rq[rq].vrq, enic_rq_alloc_buf); /* Buffer allocation failed. Stay in polling mode * so we can try to fill the ring again. @@ -1419,23 +1485,32 @@ static int enic_poll_msix(struct napi_struct *napi, int budget) if (err) work_done = work_to_do; + if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) + /* Call the function which refreshes the intr coalescing timer + * value based on the traffic. + */ + enic_calc_int_moderation(enic, &enic->rq[rq].vrq); - if (work_done < work_to_do) { + if ((work_done < budget) && napi_complete_done(napi, work_done)) { /* Some work done, but not enough to stay in polling, * exit polling */ - napi_complete(napi); + if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) + enic_set_int_moderation(enic, &enic->rq[rq].vrq); vnic_intr_unmask(&enic->intr[intr]); + enic->rq[rq].stats.napi_complete++; + } else { + enic->rq[rq].stats.napi_repoll++; } return work_done; } -static void enic_notify_timer(unsigned long data) +static void enic_notify_timer(struct timer_list *t) { - struct enic *enic = (struct enic *)data; + struct enic *enic = timer_container_of(enic, t, notify_timer); enic_notify_check(enic); @@ -1448,6 +1523,7 @@ static void enic_free_intr(struct enic *enic) struct net_device *netdev = enic->netdev; unsigned int i; + enic_free_rx_cpu_rmap(enic); switch (vnic_dev_get_intr_mode(enic->vdev)) { case VNIC_DEV_INTR_MODE_INTX: free_irq(enic->pdev->irq, netdev); @@ -1456,7 +1532,7 @@ static void enic_free_intr(struct enic *enic) free_irq(enic->pdev->irq, enic); break; case VNIC_DEV_INTR_MODE_MSIX: - for (i = 0; i < ARRAY_SIZE(enic->msix); i++) + for (i = 0; i < enic->intr_count; i++) if (enic->msix[i].requested) free_irq(enic->msix_entry[i].vector, enic->msix[i].devid); @@ -1472,6 +1548,7 @@ static int enic_request_intr(struct enic *enic) unsigned int i, intr; int err = 0; + enic_set_rx_cpu_rmap(enic); switch (vnic_dev_get_intr_mode(enic->vdev)) { case VNIC_DEV_INTR_MODE_INTX: @@ -1492,35 +1569,37 @@ static int enic_request_intr(struct enic *enic) intr = enic_msix_rq_intr(enic, i); snprintf(enic->msix[intr].devname, sizeof(enic->msix[intr].devname), - "%.11s-rx-%d", netdev->name, i); - enic->msix[intr].isr = enic_isr_msix_rq; + "%s-rx-%u", netdev->name, i); + enic->msix[intr].isr = enic_isr_msix; enic->msix[intr].devid = &enic->napi[i]; } for (i = 0; i < enic->wq_count; i++) { + int wq = enic_cq_wq(enic, i); + intr = enic_msix_wq_intr(enic, i); snprintf(enic->msix[intr].devname, sizeof(enic->msix[intr].devname), - "%.11s-tx-%d", netdev->name, i); - enic->msix[intr].isr = enic_isr_msix_wq; - enic->msix[intr].devid = enic; + "%s-tx-%u", netdev->name, i); + enic->msix[intr].isr = enic_isr_msix; + enic->msix[intr].devid = &enic->napi[wq]; } intr = enic_msix_err_intr(enic); snprintf(enic->msix[intr].devname, sizeof(enic->msix[intr].devname), - "%.11s-err", netdev->name); + "%s-err", netdev->name); enic->msix[intr].isr = enic_isr_msix_err; enic->msix[intr].devid = enic; intr = enic_msix_notify_intr(enic); snprintf(enic->msix[intr].devname, sizeof(enic->msix[intr].devname), - "%.11s-notify", netdev->name); + "%s-notify", netdev->name); enic->msix[intr].isr = enic_isr_msix_notify; enic->msix[intr].devid = enic; - for (i = 0; i < ARRAY_SIZE(enic->msix); i++) + for (i = 0; i < enic->intr_count; i++) enic->msix[i].requested = 0; for (i = 0; i < enic->intr_count; i++) { @@ -1566,11 +1645,10 @@ static int enic_dev_notify_set(struct enic *enic) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); switch (vnic_dev_get_intr_mode(enic->vdev)) { case VNIC_DEV_INTR_MODE_INTX: - err = vnic_dev_notify_set(enic->vdev, - enic_legacy_notify_intr()); + err = vnic_dev_notify_set(enic->vdev, ENIC_LEGACY_NOTIFY_INTR); break; case VNIC_DEV_INTR_MODE_MSIX: err = vnic_dev_notify_set(enic->vdev, @@ -1580,7 +1658,7 @@ static int enic_dev_notify_set(struct enic *enic) err = vnic_dev_notify_set(enic->vdev, -1 /* no intr */); break; } - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -1602,13 +1680,26 @@ static int enic_open(struct net_device *netdev) { struct enic *enic = netdev_priv(netdev); unsigned int i; - int err; + int err, ret; + unsigned int max_pkt_len = netdev->mtu + VLAN_ETH_HLEN; + struct page_pool_params pp_params = { + .order = get_order(max_pkt_len), + .pool_size = enic->config.rq_desc_count, + .nid = dev_to_node(&enic->pdev->dev), + .dev = &enic->pdev->dev, + .dma_dir = DMA_FROM_DEVICE, + .max_len = (max_pkt_len > PAGE_SIZE) ? max_pkt_len : PAGE_SIZE, + .netdev = netdev, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + }; err = enic_request_intr(enic); if (err) { netdev_err(netdev, "Unable to request irq.\n"); return err; } + enic_init_affinity_hint(enic); + enic_set_affinity_hint(enic); err = enic_dev_notify_set(enic); if (err) { @@ -1618,42 +1709,65 @@ static int enic_open(struct net_device *netdev) } for (i = 0; i < enic->rq_count; i++) { - vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf); + /* create a page pool for each RQ */ + pp_params.napi = &enic->napi[i]; + pp_params.queue_idx = i; + enic->rq[i].pool = page_pool_create(&pp_params); + if (IS_ERR(enic->rq[i].pool)) { + err = PTR_ERR(enic->rq[i].pool); + enic->rq[i].pool = NULL; + goto err_out_free_rq; + } + + /* enable rq before updating rq desc */ + vnic_rq_enable(&enic->rq[i].vrq); + vnic_rq_fill(&enic->rq[i].vrq, enic_rq_alloc_buf); /* Need at least one buffer on ring to get going */ - if (vnic_rq_desc_used(&enic->rq[i]) == 0) { + if (vnic_rq_desc_used(&enic->rq[i].vrq) == 0) { netdev_err(netdev, "Unable to alloc receive buffers\n"); err = -ENOMEM; - goto err_out_notify_unset; + goto err_out_free_rq; } } for (i = 0; i < enic->wq_count; i++) - vnic_wq_enable(&enic->wq[i]); - for (i = 0; i < enic->rq_count; i++) - vnic_rq_enable(&enic->rq[i]); + vnic_wq_enable(&enic->wq[i].vwq); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_add_station_addr(enic); enic_set_rx_mode(netdev); - netif_wake_queue(netdev); + netif_tx_wake_all_queues(netdev); for (i = 0; i < enic->rq_count; i++) napi_enable(&enic->napi[i]); + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) + for (i = 0; i < enic->wq_count; i++) + napi_enable(&enic->napi[enic_cq_wq(enic, i)]); enic_dev_enable(enic); for (i = 0; i < enic->intr_count; i++) vnic_intr_unmask(&enic->intr[i]); enic_notify_timer_start(enic); + enic_rfs_timer_start(enic); return 0; -err_out_notify_unset: +err_out_free_rq: + for (i = 0; i < enic->rq_count; i++) { + ret = vnic_rq_disable(&enic->rq[i].vrq); + if (!ret) { + vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); + page_pool_destroy(enic->rq[i].pool); + enic->rq[i].pool = NULL; + } + } enic_dev_notify_unset(enic); err_out_free_intr: + enic_unset_affinity_hint(enic); enic_free_intr(enic); return err; @@ -1673,7 +1787,8 @@ static int enic_stop(struct net_device *netdev) enic_synchronize_irqs(enic); - del_timer_sync(&enic->notify_timer); + timer_delete_sync(&enic->notify_timer); + enic_rfs_flw_tbl_free(enic); enic_dev_disable(enic); @@ -1681,29 +1796,36 @@ static int enic_stop(struct net_device *netdev) napi_disable(&enic->napi[i]); netif_carrier_off(netdev); + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) + for (i = 0; i < enic->wq_count; i++) + napi_disable(&enic->napi[enic_cq_wq(enic, i)]); netif_tx_disable(netdev); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); for (i = 0; i < enic->wq_count; i++) { - err = vnic_wq_disable(&enic->wq[i]); + err = vnic_wq_disable(&enic->wq[i].vwq); if (err) return err; } for (i = 0; i < enic->rq_count; i++) { - err = vnic_rq_disable(&enic->rq[i]); + err = vnic_rq_disable(&enic->rq[i].vrq); if (err) return err; } enic_dev_notify_unset(enic); + enic_unset_affinity_hint(enic); enic_free_intr(enic); for (i = 0; i < enic->wq_count; i++) - vnic_wq_clean(&enic->wq[i], enic_free_wq_buf); - for (i = 0; i < enic->rq_count; i++) - vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); + vnic_wq_clean(&enic->wq[i].vwq, enic_free_wq_buf); + for (i = 0; i < enic->rq_count; i++) { + vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); + page_pool_destroy(enic->rq[i].pool); + enic->rq[i].pool = NULL; + } for (i = 0; i < enic->cq_count; i++) vnic_cq_clean(&enic->cq[i]); for (i = 0; i < enic->intr_count; i++) @@ -1712,31 +1834,42 @@ static int enic_stop(struct net_device *netdev) return 0; } +static int _enic_change_mtu(struct net_device *netdev, int new_mtu) +{ + bool running = netif_running(netdev); + int err = 0; + + ASSERT_RTNL(); + if (running) { + err = enic_stop(netdev); + if (err) + return err; + } + + WRITE_ONCE(netdev->mtu, new_mtu); + + if (running) { + err = enic_open(netdev); + if (err) + return err; + } + + return 0; +} + static int enic_change_mtu(struct net_device *netdev, int new_mtu) { struct enic *enic = netdev_priv(netdev); - int running = netif_running(netdev); - - if (new_mtu < ENIC_MIN_MTU || new_mtu > ENIC_MAX_MTU) - return -EINVAL; if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; - if (running) - enic_stop(netdev); - - netdev->mtu = new_mtu; - - if (netdev->mtu > enic->port_mtu) + if (new_mtu > enic->port_mtu) netdev_warn(netdev, - "interface MTU (%d) set higher than port MTU (%d)\n", - netdev->mtu, enic->port_mtu); - - if (running) - enic_open(netdev); + "interface MTU (%d) set higher than port MTU (%d)\n", + new_mtu, enic->port_mtu); - return 0; + return _enic_change_mtu(netdev, new_mtu); } static void enic_change_mtu_work(struct work_struct *work) @@ -1744,47 +1877,9 @@ static void enic_change_mtu_work(struct work_struct *work) struct enic *enic = container_of(work, struct enic, change_mtu_work); struct net_device *netdev = enic->netdev; int new_mtu = vnic_dev_mtu(enic->vdev); - int err; - unsigned int i; - - new_mtu = max_t(int, ENIC_MIN_MTU, min_t(int, ENIC_MAX_MTU, new_mtu)); rtnl_lock(); - - /* Stop RQ */ - del_timer_sync(&enic->notify_timer); - - for (i = 0; i < enic->rq_count; i++) - napi_disable(&enic->napi[i]); - - vnic_intr_mask(&enic->intr[0]); - enic_synchronize_irqs(enic); - err = vnic_rq_disable(&enic->rq[0]); - if (err) { - rtnl_unlock(); - netdev_err(netdev, "Unable to disable RQ.\n"); - return; - } - vnic_rq_clean(&enic->rq[0], enic_free_rq_buf); - vnic_cq_clean(&enic->cq[0]); - vnic_intr_clean(&enic->intr[0]); - - /* Fill RQ with new_mtu-sized buffers */ - netdev->mtu = new_mtu; - vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); - /* Need at least one buffer on ring to get going */ - if (vnic_rq_desc_used(&enic->rq[0]) == 0) { - rtnl_unlock(); - netdev_err(netdev, "Unable to alloc receive buffers.\n"); - return; - } - - /* Start RQ */ - vnic_rq_enable(&enic->rq[0]); - napi_enable(&enic->napi[0]); - vnic_intr_unmask(&enic->intr[0]); - enic_notify_timer_start(enic); - + (void)_enic_change_mtu(netdev, new_mtu); rtnl_unlock(); netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu); @@ -1801,13 +1896,14 @@ static void enic_poll_controller(struct net_device *netdev) case VNIC_DEV_INTR_MODE_MSIX: for (i = 0; i < enic->rq_count; i++) { intr = enic_msix_rq_intr(enic, i); - enic_isr_msix_rq(enic->msix_entry[intr].vector, - &enic->napi[i]); + enic_isr_msix(enic->msix_entry[intr].vector, + &enic->napi[i]); } for (i = 0; i < enic->wq_count; i++) { intr = enic_msix_wq_intr(enic, i); - enic_isr_msix_wq(enic->msix_entry[intr].vector, enic); + enic_isr_msix(enic->msix_entry[intr].vector, + &enic->napi[enic_cq_wq(enic, i)]); } break; @@ -1832,8 +1928,6 @@ static int enic_dev_wait(struct vnic_dev *vdev, int done; int err; - BUG_ON(in_interrupt()); - err = start(vdev, arg); if (err) return err; @@ -1861,9 +1955,10 @@ static int enic_dev_wait(struct vnic_dev *vdev, static int enic_dev_open(struct enic *enic) { int err; + u32 flags = CMD_OPENF_IG_DESCCACHE; err = enic_dev_wait(enic->vdev, vnic_dev_open, - vnic_dev_open_done, 0); + vnic_dev_open_done, flags); if (err) dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n", err); @@ -1871,6 +1966,19 @@ static int enic_dev_open(struct enic *enic) return err; } +static int enic_dev_soft_reset(struct enic *enic) +{ + int err; + + err = enic_dev_wait(enic->vdev, vnic_dev_soft_reset, + vnic_dev_soft_reset_done, 0); + if (err) + netdev_err(enic->netdev, "vNIC soft reset failed, err %d\n", + err); + + return err; +} + static int enic_dev_hang_reset(struct enic *enic) { int err; @@ -1884,37 +1992,42 @@ static int enic_dev_hang_reset(struct enic *enic) return err; } -static int enic_set_rsskey(struct enic *enic) +int __enic_set_rsskey(struct enic *enic) { + union vnic_rss_key *rss_key_buf_va; dma_addr_t rss_key_buf_pa; - union vnic_rss_key *rss_key_buf_va = NULL; - union vnic_rss_key rss_key = { - .key[0].b = {85, 67, 83, 97, 119, 101, 115, 111, 109, 101}, - .key[1].b = {80, 65, 76, 79, 117, 110, 105, 113, 117, 101}, - .key[2].b = {76, 73, 78, 85, 88, 114, 111, 99, 107, 115}, - .key[3].b = {69, 78, 73, 67, 105, 115, 99, 111, 111, 108}, - }; - int err; + int i, kidx, bidx, err; - rss_key_buf_va = pci_alloc_consistent(enic->pdev, - sizeof(union vnic_rss_key), &rss_key_buf_pa); + rss_key_buf_va = dma_alloc_coherent(&enic->pdev->dev, + sizeof(union vnic_rss_key), + &rss_key_buf_pa, GFP_ATOMIC); if (!rss_key_buf_va) return -ENOMEM; - memcpy(rss_key_buf_va, &rss_key, sizeof(union vnic_rss_key)); - - spin_lock(&enic->devcmd_lock); + for (i = 0; i < ENIC_RSS_LEN; i++) { + kidx = i / ENIC_RSS_BYTES_PER_KEY; + bidx = i % ENIC_RSS_BYTES_PER_KEY; + rss_key_buf_va->key[kidx].b[bidx] = enic->rss_key[i]; + } + spin_lock_bh(&enic->devcmd_lock); err = enic_set_rss_key(enic, rss_key_buf_pa, sizeof(union vnic_rss_key)); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); - pci_free_consistent(enic->pdev, sizeof(union vnic_rss_key), - rss_key_buf_va, rss_key_buf_pa); + dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_key), + rss_key_buf_va, rss_key_buf_pa); return err; } +static int enic_set_rsskey(struct enic *enic) +{ + netdev_rss_key_fill(enic->rss_key, ENIC_RSS_LEN); + + return __enic_set_rsskey(enic); +} + static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) { dma_addr_t rss_cpu_buf_pa; @@ -1922,22 +2035,23 @@ static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) unsigned int i; int err; - rss_cpu_buf_va = pci_alloc_consistent(enic->pdev, - sizeof(union vnic_rss_cpu), &rss_cpu_buf_pa); + rss_cpu_buf_va = dma_alloc_coherent(&enic->pdev->dev, + sizeof(union vnic_rss_cpu), + &rss_cpu_buf_pa, GFP_ATOMIC); if (!rss_cpu_buf_va) return -ENOMEM; for (i = 0; i < (1 << rss_hash_bits); i++) (*rss_cpu_buf_va).cpu[i/4].b[i%4] = i % enic->rq_count; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = enic_set_rss_cpu(enic, rss_cpu_buf_pa, sizeof(union vnic_rss_cpu)); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); - pci_free_consistent(enic->pdev, sizeof(union vnic_rss_cpu), - rss_cpu_buf_va, rss_cpu_buf_pa); + dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_cpu), + rss_cpu_buf_va, rss_cpu_buf_pa); return err; } @@ -1952,13 +2066,13 @@ static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu, /* Enable VLAN tag stripping. */ - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = enic_set_nic_cfg(enic, rss_default_cpu, rss_hash_type, rss_hash_bits, rss_base_cpu, rss_enable, tso_ipid_split_en, ig_vlan_strip_en); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -1967,14 +2081,24 @@ static int enic_set_rss_nic_cfg(struct enic *enic) { struct device *dev = enic_get_dev(enic); const u8 rss_default_cpu = 0; - const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 | - NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 | - NIC_CFG_RSS_HASH_TYPE_IPV6 | - NIC_CFG_RSS_HASH_TYPE_TCP_IPV6; const u8 rss_hash_bits = 7; const u8 rss_base_cpu = 0; + u8 rss_hash_type; + int res; u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1); + spin_lock_bh(&enic->devcmd_lock); + res = vnic_dev_capable_rss_hash_type(enic->vdev, &rss_hash_type); + spin_unlock_bh(&enic->devcmd_lock); + if (res) { + /* defaults for old adapters + */ + rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 | + NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 | + NIC_CFG_RSS_HASH_TYPE_IPV6 | + NIC_CFG_RSS_HASH_TYPE_TCP_IPV6; + } + if (rss_enable) { if (!enic_set_rsskey(enic)) { if (enic_set_rsscpu(enic, rss_hash_bits)) { @@ -1992,6 +2116,13 @@ static int enic_set_rss_nic_cfg(struct enic *enic) rss_hash_bits, rss_base_cpu, rss_enable); } +static void enic_set_api_busy(struct enic *enic, bool busy) +{ + spin_lock(&enic->enic_api_lock); + enic->enic_api_busy = busy; + spin_unlock(&enic->enic_api_lock); +} + static void enic_reset(struct work_struct *work) { struct enic *enic = container_of(work, struct enic, reset); @@ -2001,6 +2132,35 @@ static void enic_reset(struct work_struct *work) rtnl_lock(); + /* Stop any activity from infiniband */ + enic_set_api_busy(enic, true); + + enic_stop(enic->netdev); + enic_dev_soft_reset(enic); + enic_reset_addr_lists(enic); + enic_init_vnic_resources(enic); + enic_set_rss_nic_cfg(enic); + enic_dev_set_ig_vlan_rewrite_mode(enic); + enic_ext_cq(enic); + enic_open(enic->netdev); + + /* Allow infiniband to fiddle with the device again */ + enic_set_api_busy(enic, false); + + call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); + + rtnl_unlock(); +} + +static void enic_tx_hang_reset(struct work_struct *work) +{ + struct enic *enic = container_of(work, struct enic, tx_hang_reset); + + rtnl_lock(); + + /* Stop any activity from infiniband */ + enic_set_api_busy(enic, true); + enic_dev_hang_notify(enic); enic_stop(enic->netdev); enic_dev_hang_reset(enic); @@ -2008,117 +2168,69 @@ static void enic_reset(struct work_struct *work) enic_init_vnic_resources(enic); enic_set_rss_nic_cfg(enic); enic_dev_set_ig_vlan_rewrite_mode(enic); + enic_ext_cq(enic); enic_open(enic->netdev); + /* Allow infiniband to fiddle with the device again */ + enic_set_api_busy(enic, false); + + call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); + rtnl_unlock(); } static int enic_set_intr_mode(struct enic *enic) { - unsigned int n = min_t(unsigned int, enic->rq_count, ENIC_RQ_MAX); - unsigned int m = min_t(unsigned int, enic->wq_count, ENIC_WQ_MAX); unsigned int i; + int num_intr; /* Set interrupt mode (INTx, MSI, MSI-X) depending * on system capabilities. * * Try MSI-X first - * - * We need n RQs, m WQs, n+m CQs, and n+m+2 INTRs - * (the second to last INTR is used for WQ/RQ errors) - * (the last INTR is used for notifications) */ - BUG_ON(ARRAY_SIZE(enic->msix_entry) < n + m + 2); - for (i = 0; i < n + m + 2; i++) - enic->msix_entry[i].entry = i; - - /* Use multiple RQs if RSS is enabled - */ - - if (ENIC_SETTING(enic, RSS) && - enic->config.intr_mode < 1 && - enic->rq_count >= n && - enic->wq_count >= m && - enic->cq_count >= n + m && - enic->intr_count >= n + m + 2) { - - if (!pci_enable_msix(enic->pdev, enic->msix_entry, n + m + 2)) { - - enic->rq_count = n; - enic->wq_count = m; - enic->cq_count = n + m; - enic->intr_count = n + m + 2; - - vnic_dev_set_intr_mode(enic->vdev, - VNIC_DEV_INTR_MODE_MSIX); - - return 0; - } - } - if (enic->config.intr_mode < 1 && - enic->rq_count >= 1 && - enic->wq_count >= m && - enic->cq_count >= 1 + m && - enic->intr_count >= 1 + m + 2) { - if (!pci_enable_msix(enic->pdev, enic->msix_entry, 1 + m + 2)) { - - enic->rq_count = 1; - enic->wq_count = m; - enic->cq_count = 1 + m; - enic->intr_count = 1 + m + 2; - + enic->intr_avail >= ENIC_MSIX_MIN_INTR) { + for (i = 0; i < enic->intr_avail; i++) + enic->msix_entry[i].entry = i; + + num_intr = pci_enable_msix_range(enic->pdev, enic->msix_entry, + ENIC_MSIX_MIN_INTR, + enic->intr_avail); + if (num_intr > 0) { vnic_dev_set_intr_mode(enic->vdev, - VNIC_DEV_INTR_MODE_MSIX); - + VNIC_DEV_INTR_MODE_MSIX); + enic->intr_avail = num_intr; return 0; } } /* Next try MSI * - * We need 1 RQ, 1 WQ, 2 CQs, and 1 INTR + * We need 1 INTR */ if (enic->config.intr_mode < 2 && - enic->rq_count >= 1 && - enic->wq_count >= 1 && - enic->cq_count >= 2 && - enic->intr_count >= 1 && + enic->intr_avail >= 1 && !pci_enable_msi(enic->pdev)) { - - enic->rq_count = 1; - enic->wq_count = 1; - enic->cq_count = 2; - enic->intr_count = 1; - + enic->intr_avail = 1; vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSI); - return 0; } /* Next try INTx * - * We need 1 RQ, 1 WQ, 2 CQs, and 3 INTRs + * We need 3 INTRs * (the first INTR is used for WQ/RQ) * (the second INTR is used for WQ/RQ errors) * (the last INTR is used for notifications) */ if (enic->config.intr_mode < 3 && - enic->rq_count >= 1 && - enic->wq_count >= 1 && - enic->cq_count >= 2 && - enic->intr_count >= 3) { - - enic->rq_count = 1; - enic->wq_count = 1; - enic->cq_count = 2; - enic->intr_count = 3; - + enic->intr_avail >= 3) { + enic->intr_avail = 3; vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_INTX); - return 0; } @@ -2143,6 +2255,127 @@ static void enic_clear_intr_mode(struct enic *enic) vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); } +static int enic_adjust_resources(struct enic *enic) +{ + unsigned int max_queues; + unsigned int rq_default; + unsigned int rq_avail; + unsigned int wq_avail; + + if (enic->rq_avail < 1 || enic->wq_avail < 1 || enic->cq_avail < 2) { + dev_err(enic_get_dev(enic), + "Not enough resources available rq: %d wq: %d cq: %d\n", + enic->rq_avail, enic->wq_avail, + enic->cq_avail); + return -ENOSPC; + } + + if (is_kdump_kernel()) { + dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n"); + enic->rq_avail = 1; + enic->wq_avail = 1; + enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS; + enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS; + enic->config.mtu = min_t(u16, 1500, enic->config.mtu); + } + + /* if RSS isn't set, then we can only use one RQ */ + if (!ENIC_SETTING(enic, RSS)) + enic->rq_avail = 1; + + switch (vnic_dev_get_intr_mode(enic->vdev)) { + case VNIC_DEV_INTR_MODE_INTX: + case VNIC_DEV_INTR_MODE_MSI: + enic->rq_count = 1; + enic->wq_count = 1; + enic->cq_count = 2; + enic->intr_count = enic->intr_avail; + break; + case VNIC_DEV_INTR_MODE_MSIX: + /* Adjust the number of wqs/rqs/cqs/interrupts that will be + * used based on which resource is the most constrained + */ + wq_avail = min(enic->wq_avail, ENIC_WQ_MAX); + rq_default = max(netif_get_num_default_rss_queues(), + ENIC_RQ_MIN_DEFAULT); + rq_avail = min3(enic->rq_avail, ENIC_RQ_MAX, rq_default); + max_queues = min(enic->cq_avail, + enic->intr_avail - ENIC_MSIX_RESERVED_INTR); + if (wq_avail + rq_avail <= max_queues) { + enic->rq_count = rq_avail; + enic->wq_count = wq_avail; + } else { + /* recalculate wq/rq count */ + if (rq_avail < wq_avail) { + enic->rq_count = min(rq_avail, max_queues / 2); + enic->wq_count = max_queues - enic->rq_count; + } else { + enic->wq_count = min(wq_avail, max_queues / 2); + enic->rq_count = max_queues - enic->wq_count; + } + } + enic->cq_count = enic->rq_count + enic->wq_count; + enic->intr_count = enic->cq_count + ENIC_MSIX_RESERVED_INTR; + + break; + default: + dev_err(enic_get_dev(enic), "Unknown interrupt mode\n"); + return -EINVAL; + } + + return 0; +} + +static void enic_get_queue_stats_rx(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *rxs) +{ + struct enic *enic = netdev_priv(dev); + struct enic_rq_stats *rqstats = &enic->rq[idx].stats; + + rxs->bytes = rqstats->bytes; + rxs->packets = rqstats->packets; + rxs->hw_drops = rqstats->bad_fcs + rqstats->pkt_truncated; + rxs->hw_drop_overruns = rqstats->pkt_truncated; + rxs->csum_unnecessary = rqstats->csum_unnecessary + + rqstats->csum_unnecessary_encap; + rxs->alloc_fail = rqstats->pp_alloc_fail; +} + +static void enic_get_queue_stats_tx(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *txs) +{ + struct enic *enic = netdev_priv(dev); + struct enic_wq_stats *wqstats = &enic->wq[idx].stats; + + txs->bytes = wqstats->bytes; + txs->packets = wqstats->packets; + txs->csum_none = wqstats->csum_none; + txs->needs_csum = wqstats->csum_partial + wqstats->encap_csum + + wqstats->tso; + txs->hw_gso_packets = wqstats->tso; + txs->stop = wqstats->stopped; + txs->wake = wqstats->wake; +} + +static void enic_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rxs, + struct netdev_queue_stats_tx *txs) +{ + rxs->bytes = 0; + rxs->packets = 0; + rxs->hw_drops = 0; + rxs->hw_drop_overruns = 0; + rxs->csum_unnecessary = 0; + rxs->alloc_fail = 0; + txs->bytes = 0; + txs->packets = 0; + txs->csum_none = 0; + txs->needs_csum = 0; + txs->hw_gso_packets = 0; + txs->stop = 0; + txs->wake = 0; +} + static const struct net_device_ops enic_netdev_dynamic_ops = { .ndo_open = enic_open, .ndo_stop = enic_stop, @@ -2161,6 +2394,10 @@ static const struct net_device_ops enic_netdev_dynamic_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = enic_poll_controller, #endif +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = enic_rx_flow_steer, +#endif + .ndo_features_check = enic_features_check, }; static const struct net_device_ops enic_netdev_ops = { @@ -2181,17 +2418,101 @@ static const struct net_device_ops enic_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = enic_poll_controller, #endif +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = enic_rx_flow_steer, +#endif + .ndo_features_check = enic_features_check, +}; + +static const struct netdev_stat_ops enic_netdev_stat_ops = { + .get_queue_stats_rx = enic_get_queue_stats_rx, + .get_queue_stats_tx = enic_get_queue_stats_tx, + .get_base_stats = enic_get_base_stats, }; +static void enic_free_enic_resources(struct enic *enic) +{ + kfree(enic->wq); + enic->wq = NULL; + + kfree(enic->rq); + enic->rq = NULL; + + kfree(enic->cq); + enic->cq = NULL; + + kfree(enic->napi); + enic->napi = NULL; + + kfree(enic->msix_entry); + enic->msix_entry = NULL; + + kfree(enic->msix); + enic->msix = NULL; + + kfree(enic->intr); + enic->intr = NULL; +} + +static int enic_alloc_enic_resources(struct enic *enic) +{ + enic->wq = kcalloc(enic->wq_avail, sizeof(struct enic_wq), GFP_KERNEL); + if (!enic->wq) + goto free_queues; + + enic->rq = kcalloc(enic->rq_avail, sizeof(struct enic_rq), GFP_KERNEL); + if (!enic->rq) + goto free_queues; + + enic->cq = kcalloc(enic->cq_avail, sizeof(struct vnic_cq), GFP_KERNEL); + if (!enic->cq) + goto free_queues; + + enic->napi = kcalloc(enic->wq_avail + enic->rq_avail, + sizeof(struct napi_struct), GFP_KERNEL); + if (!enic->napi) + goto free_queues; + + enic->msix_entry = kcalloc(enic->intr_avail, sizeof(struct msix_entry), + GFP_KERNEL); + if (!enic->msix_entry) + goto free_queues; + + enic->msix = kcalloc(enic->intr_avail, sizeof(struct enic_msix_entry), + GFP_KERNEL); + if (!enic->msix) + goto free_queues; + + enic->intr = kcalloc(enic->intr_avail, sizeof(struct vnic_intr), + GFP_KERNEL); + if (!enic->intr) + goto free_queues; + + return 0; + +free_queues: + enic_free_enic_resources(enic); + return -ENOMEM; +} + static void enic_dev_deinit(struct enic *enic) { unsigned int i; for (i = 0; i < enic->rq_count; i++) - netif_napi_del(&enic->napi[i]); + __netif_napi_del(&enic->napi[i]); + + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) + for (i = 0; i < enic->wq_count; i++) + __netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); + + /* observe RCU grace period after __netif_napi_del() calls */ + synchronize_net(); enic_free_vnic_resources(enic); enic_clear_intr_mode(enic); + enic_free_affinity_hint(enic); + enic_free_enic_resources(enic); } static int enic_dev_init(struct enic *enic) @@ -2223,15 +2544,28 @@ static int enic_dev_init(struct enic *enic) enic_get_res_counts(enic); - /* Set interrupt mode based on resource counts and system - * capabilities - */ + enic_ext_cq(enic); + + err = enic_alloc_enic_resources(enic); + if (err) { + dev_err(dev, "Failed to allocate enic resources\n"); + return err; + } + + /* Set interrupt mode based on system capabilities */ err = enic_set_intr_mode(enic); if (err) { dev_err(dev, "Failed to set intr mode based on resource " "counts and system capabilities, aborting\n"); - return err; + goto err_out_free_vnic_resources; + } + + /* Adjust resource counts based on most constrained resources */ + err = enic_adjust_resources(enic); + if (err) { + dev_err(dev, "Failed to adjust resources\n"); + goto err_out_free_vnic_resources; } /* Allocate and configure vNIC resources @@ -2253,20 +2587,27 @@ static int enic_dev_init(struct enic *enic) switch (vnic_dev_get_intr_mode(enic->vdev)) { default: - netif_napi_add(netdev, &enic->napi[0], enic_poll, 64); + netif_napi_add(netdev, &enic->napi[0], enic_poll); break; case VNIC_DEV_INTR_MODE_MSIX: - for (i = 0; i < enic->rq_count; i++) + for (i = 0; i < enic->rq_count; i++) { netif_napi_add(netdev, &enic->napi[i], - enic_poll_msix, 64); + enic_poll_msix_rq); + } + for (i = 0; i < enic->wq_count; i++) + netif_napi_add(netdev, + &enic->napi[enic_cq_wq(enic, i)], + enic_poll_msix_wq); break; } return 0; err_out_free_vnic_resources: + enic_free_affinity_hint(enic); enic_clear_intr_mode(enic); enic_free_vnic_resources(enic); + enic_free_enic_resources(enic); return err; } @@ -2297,7 +2638,8 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * instance data is initialized to zero. */ - netdev = alloc_etherdev(sizeof(struct enic)); + netdev = alloc_etherdev_mqs(sizeof(struct enic), + ENIC_RQ_MAX, ENIC_WQ_MAX); if (!netdev) return -ENOMEM; @@ -2327,30 +2669,18 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); /* Query PCI controller on system for DMA addressing - * limitation for the device. Try 40-bit first, and + * limitation for the device. Try 47-bit first, and * fail to 32-bit. */ - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(47)); if (err) { - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { dev_err(dev, "No usable DMA configuration, aborting\n"); goto err_out_release_regions; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) { - dev_err(dev, "Unable to obtain %u-bit DMA " - "for consistent allocations, aborting\n", 32); - goto err_out_release_regions; - } } else { - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); - if (err) { - dev_err(dev, "Unable to obtain %u-bit DMA " - "for consistent allocations, aborting\n", 40); - goto err_out_release_regions; - } using_dac = 1; } @@ -2381,6 +2711,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_iounmap; } + err = vnic_devcmd_init(enic->vdev); + + if (err) + goto err_out_vnic_unregister; + #ifdef CONFIG_PCI_IOV /* Get number of subvnics */ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); @@ -2421,6 +2756,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ spin_lock_init(&enic->devcmd_lock); + spin_lock_init(&enic->enic_api_lock); /* * Set ingress vlan rewrite mode before vnic initialization @@ -2462,24 +2798,26 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_dev_close; } + netif_set_real_num_tx_queues(netdev, enic->wq_count); + netif_set_real_num_rx_queues(netdev, enic->rq_count); + /* Setup notification timer, HW reset task, and wq locks */ - init_timer(&enic->notify_timer); - enic->notify_timer.function = enic_notify_timer; - enic->notify_timer.data = (unsigned long)enic; + timer_setup(&enic->notify_timer, enic_notify_timer, 0); + enic_rfs_flw_tbl_init(enic); INIT_WORK(&enic->reset, enic_reset); + INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work); for (i = 0; i < enic->wq_count; i++) - spin_lock_init(&enic->wq_lock[i]); + spin_lock_init(&enic->wq[i].lock); /* Register net device */ enic->port_mtu = enic->config.mtu; - (void)enic_change_mtu(netdev, enic->port_mtu); err = enic_set_mac_addr(netdev, enic->mac_addr); if (err) { @@ -2488,15 +2826,19 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } enic->tx_coalesce_usecs = enic->config.intr_timer_usec; + /* rx coalesce time already got initialized. This gets used + * if adaptive coal is turned off + */ enic->rx_coalesce_usecs = enic->tx_coalesce_usecs; if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) netdev->netdev_ops = &enic_netdev_dynamic_ops; else netdev->netdev_ops = &enic_netdev_ops; + netdev->stat_ops = &enic_netdev_stat_ops; netdev->watchdog_timeo = 2 * HZ; - netdev->ethtool_ops = &enic_ethtool_ops; + enic_set_ethtool_ops(netdev); netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (ENIC_SETTING(enic, LOOP)) { @@ -2510,16 +2852,71 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ENIC_SETTING(enic, TSO)) netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN; + if (ENIC_SETTING(enic, RSS)) + netdev->hw_features |= NETIF_F_RXHASH; if (ENIC_SETTING(enic, RXCSUM)) netdev->hw_features |= NETIF_F_RXCSUM; + if (ENIC_SETTING(enic, VXLAN)) { + u64 patch_level; + u64 a1 = 0; + + netdev->hw_enc_features |= NETIF_F_RXCSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_HW_CSUM | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_features |= netdev->hw_enc_features; + /* get bit mask from hw about supported offload bit level + * BIT(0) = fw supports patch_level 0 + * fcoe bit = encap + * fcoe_fc_crc_ok = outer csum ok + * BIT(1) = always set by fw + * BIT(2) = fw supports patch_level 2 + * BIT(0) in rss_hash = encap + * BIT(1,2) in rss_hash = outer_ip_csum_ok/ + * outer_tcp_csum_ok + * used in enic_rq_indicate_buf + */ + err = vnic_dev_get_supported_feature_ver(enic->vdev, + VIC_FEATURE_VXLAN, + &patch_level, &a1); + if (err) + patch_level = 0; + enic->vxlan.flags = (u8)a1; + /* mask bits that are supported by driver + */ + patch_level &= BIT_ULL(0) | BIT_ULL(2); + patch_level = fls(patch_level); + patch_level = patch_level ? patch_level - 1 : 0; + enic->vxlan.patch_level = patch_level; + + if (vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) == 1 || + enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ) { + netdev->udp_tunnel_nic_info = &enic_udp_tunnels_v4; + if (enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6) + netdev->udp_tunnel_nic_info = &enic_udp_tunnels; + } + } netdev->features |= netdev->hw_features; + netdev->vlan_features |= netdev->features; + +#ifdef CONFIG_RFS_ACCEL + netdev->hw_features |= NETIF_F_NTUPLE; +#endif if (using_dac) netdev->features |= NETIF_F_HIGHDMA; netdev->priv_flags |= IFF_UNICAST_FLT; + /* MTU range: 68 - 9000 */ + netdev->min_mtu = ENIC_MIN_MTU; + netdev->max_mtu = ENIC_MAX_MTU; + netdev->mtu = enic->port_mtu; + err = register_netdev(netdev); if (err) { dev_err(dev, "Cannot register net device, aborting\n"); @@ -2540,8 +2937,8 @@ err_out_disable_sriov_pp: pci_disable_sriov(pdev); enic->priv_flags &= ~ENIC_SRIOV_ENABLED; } -err_out_vnic_unregister: #endif +err_out_vnic_unregister: vnic_dev_unregister(enic->vdev); err_out_iounmap: enic_iounmap(enic); @@ -2550,7 +2947,6 @@ err_out_release_regions: err_out_disable_device: pci_disable_device(pdev); err_out_free_netdev: - pci_set_drvdata(pdev, NULL); free_netdev(netdev); return err; @@ -2579,7 +2975,6 @@ static void enic_remove(struct pci_dev *pdev) enic_iounmap(enic); pci_release_regions(pdev); pci_disable_device(pdev); - pci_set_drvdata(pdev, NULL); free_netdev(netdev); } } @@ -2591,17 +2986,4 @@ static struct pci_driver enic_driver = { .remove = enic_remove, }; -static int __init enic_init_module(void) -{ - pr_info("%s, ver %s\n", DRV_DESCRIPTION, DRV_VERSION); - - return pci_register_driver(&enic_driver); -} - -static void __exit enic_cleanup_module(void) -{ - pci_unregister_driver(&enic_driver); -} - -module_init(enic_init_module); -module_exit(enic_cleanup_module); +module_pci_driver(enic_driver); |
