diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 09:00:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 09:00:47 -0700 |
commit | 6c373ca89399c5a3f7ef210ad8f63dc3437da345 (patch) | |
tree | 74d1ec65087df1da1021b43ac51acc1ee8601809 /drivers/net/ethernet/emulex | |
parent | bb0fd7ab0986105765d11baa82e619c618a235aa (diff) | |
parent | 9f9151412dd7aae0e3f51a89ae4a1f8755fdb4d0 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Add BQL support to via-rhine, from Tino Reichardt.
2) Integrate SWITCHDEV layer support into the DSA layer, so DSA drivers
can support hw switch offloading. From Floria Fainelli.
3) Allow 'ip address' commands to initiate multicast group join/leave,
from Madhu Challa.
4) Many ipv4 FIB lookup optimizations from Alexander Duyck.
5) Support EBPF in cls_bpf classifier and act_bpf action, from Daniel
Borkmann.
6) Remove the ugly compat support in ARP for ugly layers like ax25,
rose, etc. And use this to clean up the neigh layer, then use it to
implement MPLS support. All from Eric Biederman.
7) Support L3 forwarding offloading in switches, from Scott Feldman.
8) Collapse the LOCAL and MAIN ipv4 FIB tables when possible, to speed
up route lookups even further. From Alexander Duyck.
9) Many improvements and bug fixes to the rhashtable implementation,
from Herbert Xu and Thomas Graf. In particular, in the case where
an rhashtable user bulk adds a large number of items into an empty
table, we expand the table much more sanely.
10) Don't make the tcp_metrics hash table per-namespace, from Eric
Biederman.
11) Extend EBPF to access SKB fields, from Alexei Starovoitov.
12) Split out new connection request sockets so that they can be
established in the main hash table. Much less false sharing since
hash lookups go direct to the request sockets instead of having to
go first to the listener then to the request socks hashed
underneath. From Eric Dumazet.
13) Add async I/O support for crytpo AF_ALG sockets, from Tadeusz Struk.
14) Support stable privacy address generation for RFC7217 in IPV6. From
Hannes Frederic Sowa.
15) Hash network namespace into IP frag IDs, also from Hannes Frederic
Sowa.
16) Convert PTP get/set methods to use 64-bit time, from Richard
Cochran.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1816 commits)
fm10k: Bump driver version to 0.15.2
fm10k: corrected VF multicast update
fm10k: mbx_update_max_size does not drop all oversized messages
fm10k: reset head instead of calling update_max_size
fm10k: renamed mbx_tx_dropped to mbx_tx_oversized
fm10k: update xcast mode before synchronizing multicast addresses
fm10k: start service timer on probe
fm10k: fix function header comment
fm10k: comment next_vf_mbx flow
fm10k: don't handle mailbox events in iov_event path and always process mailbox
fm10k: use separate workqueue for fm10k driver
fm10k: Set PF queues to unlimited bandwidth during virtualization
fm10k: expose tx_timeout_count as an ethtool stat
fm10k: only increment tx_timeout_count in Tx hang path
fm10k: remove extraneous "Reset interface" message
fm10k: separate PF only stats so that VF does not display them
fm10k: use hw->mac.max_queues for stats
fm10k: only show actual queues, not the maximum in hardware
fm10k: allow creation of VLAN on default vid
fm10k: fix unused warnings
...
Diffstat (limited to 'drivers/net/ethernet/emulex')
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be.h | 57 | ||||
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be_cmds.c | 203 | ||||
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be_cmds.h | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be_ethtool.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be_main.c | 1200 |
5 files changed, 843 insertions, 642 deletions
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 27b9fe99a9bd..1bf1cdce74ac 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -30,11 +30,12 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/u64_stats_sync.h> +#include <linux/cpumask.h> #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "10.4u" +#define DRV_VER "10.6.0.1" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" @@ -87,6 +88,7 @@ #define BE3_MAX_EVT_QS 16 #define BE3_SRIOV_MAX_EVT_QS 8 +#define MAX_RSS_IFACES 15 #define MAX_RX_QS 32 #define MAX_EVT_QS 32 #define MAX_TX_QS 32 @@ -97,6 +99,7 @@ #define BE_NAPI_WEIGHT 64 #define MAX_RX_POST BE_NAPI_WEIGHT /* Frags posted at a time */ #define RX_FRAGS_REFILL_WM (RX_Q_LEN - MAX_RX_POST) +#define MAX_NUM_POST_ERX_DB 255u #define MAX_VFS 30 /* Max VFs supported by BE3 FW */ #define FW_VER_LEN 32 @@ -182,6 +185,7 @@ struct be_eq_obj { u16 spurious_intr; struct napi_struct napi; struct be_adapter *adapter; + cpumask_var_t affinity_mask; #ifdef CONFIG_NET_RX_BUSY_POLL #define BE_EQ_IDLE 0 @@ -238,10 +242,17 @@ struct be_tx_stats { struct u64_stats_sync sync_compl; }; +/* Structure to hold some data of interest obtained from a TX CQE */ +struct be_tx_compl_info { + u8 status; /* Completion status */ + u16 end_index; /* Completed TXQ Index */ +}; + struct be_tx_obj { u32 db_offset; struct be_queue_info q; struct be_queue_info cq; + struct be_tx_compl_info txcp; /* Remember the skbs that were transmitted */ struct sk_buff *sent_skb_list[TX_Q_LEN]; struct be_tx_stats stats; @@ -370,6 +381,7 @@ enum vf_state { #define BE_FLAGS_VXLAN_OFFLOADS BIT(8) #define BE_FLAGS_SETUP_DONE BIT(9) #define BE_FLAGS_EVT_INCOMPATIBLE_SFP BIT(10) +#define BE_FLAGS_ERR_DETECTION_SCHEDULED BIT(11) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 @@ -404,8 +416,11 @@ struct be_resources { u16 max_tx_qs; u16 max_rss_qs; u16 max_rx_qs; + u16 max_cq_count; u16 max_uc_mac; /* Max UC MACs programmable */ u16 max_vlans; /* Number of vlans supported */ + u16 max_iface_count; + u16 max_mcc_count; u16 max_evt_qs; u32 if_cap_flags; u32 vf_if_cap_flags; /* VF if capability flags */ @@ -418,6 +433,39 @@ struct rss_info { u8 rss_hkey[RSS_HASH_KEY_LEN]; }; +/* Macros to read/write the 'features' word of be_wrb_params structure. + */ +#define BE_WRB_F_BIT(name) BE_WRB_F_##name##_BIT +#define BE_WRB_F_MASK(name) BIT_MASK(BE_WRB_F_##name##_BIT) + +#define BE_WRB_F_GET(word, name) \ + (((word) & (BE_WRB_F_MASK(name))) >> BE_WRB_F_BIT(name)) + +#define BE_WRB_F_SET(word, name, val) \ + ((word) |= (((val) << BE_WRB_F_BIT(name)) & BE_WRB_F_MASK(name))) + +/* Feature/offload bits */ +enum { + BE_WRB_F_CRC_BIT, /* Ethernet CRC */ + BE_WRB_F_IPCS_BIT, /* IP csum */ + BE_WRB_F_TCPCS_BIT, /* TCP csum */ + BE_WRB_F_UDPCS_BIT, /* UDP csum */ + BE_WRB_F_LSO_BIT, /* LSO */ + BE_WRB_F_LSO6_BIT, /* LSO6 */ + BE_WRB_F_VLAN_BIT, /* VLAN */ + BE_WRB_F_VLAN_SKIP_HW_BIT /* Skip VLAN tag (workaround) */ +}; + +/* The structure below provides a HW-agnostic abstraction of WRB params + * retrieved from a TX skb. This is in turn passed to chip specific routines + * during transmit, to set the corresponding params in the WRB. + */ +struct be_wrb_params { + u32 features; /* Feature bits */ + u16 vlan_tag; /* VLAN tag */ + u16 lso_mss; /* MSS for LSO */ +}; + struct be_adapter { struct pci_dev *pdev; struct net_device *netdev; @@ -449,6 +497,8 @@ struct be_adapter { /* Rx rings */ u16 num_rx_qs; + u16 num_rss_qs; + u16 need_def_rxq; struct be_rx_obj rx_obj[MAX_RX_QS]; u32 big_page_size; /* Compounded page size shared by rx wrbs */ @@ -463,7 +513,7 @@ struct be_adapter { struct delayed_work work; u16 work_counter; - struct delayed_work func_recovery_work; + struct delayed_work be_err_detection_work; u32 flags; u32 cmd_privileges; /* Ethtool knobs and info */ @@ -596,9 +646,8 @@ extern const struct ethtool_ops be_ethtool_ops; for (i = 0, rxo = &adapter->rx_obj[i]; i < adapter->num_rx_qs; \ i++, rxo++) -/* Skip the default non-rss queue (last one)*/ #define for_all_rss_queues(adapter, rxo, i) \ - for (i = 0, rxo = &adapter->rx_obj[i]; i < (adapter->num_rx_qs - 1);\ + for (i = 0, rxo = &adapter->rx_obj[i]; i < adapter->num_rss_qs; \ i++, rxo++) #define for_all_tx_queues(adapter, txo, i) \ diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 7f05f309e935..fb140faeafb1 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -635,73 +635,16 @@ static int lancer_wait_ready(struct be_adapter *adapter) for (i = 0; i < SLIPORT_READY_TIMEOUT; i++) { sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); if (sliport_status & SLIPORT_STATUS_RDY_MASK) - break; - - msleep(1000); - } - - if (i == SLIPORT_READY_TIMEOUT) - return sliport_status ? : -1; - - return 0; -} - -static bool lancer_provisioning_error(struct be_adapter *adapter) -{ - u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; - - sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); - if (sliport_status & SLIPORT_STATUS_ERR_MASK) { - sliport_err1 = ioread32(adapter->db + SLIPORT_ERROR1_OFFSET); - sliport_err2 = ioread32(adapter->db + SLIPORT_ERROR2_OFFSET); - - if (sliport_err1 == SLIPORT_ERROR_NO_RESOURCE1 && - sliport_err2 == SLIPORT_ERROR_NO_RESOURCE2) - return true; - } - return false; -} - -int lancer_test_and_set_rdy_state(struct be_adapter *adapter) -{ - int status; - u32 sliport_status, err, reset_needed; - bool resource_error; + return 0; - resource_error = lancer_provisioning_error(adapter); - if (resource_error) - return -EAGAIN; + if (sliport_status & SLIPORT_STATUS_ERR_MASK && + !(sliport_status & SLIPORT_STATUS_RN_MASK)) + return -EIO; - status = lancer_wait_ready(adapter); - if (!status) { - sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); - err = sliport_status & SLIPORT_STATUS_ERR_MASK; - reset_needed = sliport_status & SLIPORT_STATUS_RN_MASK; - if (err && reset_needed) { - iowrite32(SLI_PORT_CONTROL_IP_MASK, - adapter->db + SLIPORT_CONTROL_OFFSET); - - /* check if adapter has corrected the error */ - status = lancer_wait_ready(adapter); - sliport_status = ioread32(adapter->db + - SLIPORT_STATUS_OFFSET); - sliport_status &= (SLIPORT_STATUS_ERR_MASK | - SLIPORT_STATUS_RN_MASK); - if (status || sliport_status) - status = -1; - } else if (err || reset_needed) { - status = -1; - } + msleep(1000); } - /* Stop error recovery if error is not recoverable. - * No resource error is temporary errors and will go away - * when PF provisions resources. - */ - resource_error = lancer_provisioning_error(adapter); - if (resource_error) - status = -EAGAIN; - return status; + return sliport_status ? : -1; } int be_fw_wait_ready(struct be_adapter *adapter) @@ -720,6 +663,10 @@ int be_fw_wait_ready(struct be_adapter *adapter) } do { + /* There's no means to poll POST state on BE2/3 VFs */ + if (BEx_chip(adapter) && be_virtfn(adapter)) + return 0; + stage = be_POST_stage_get(adapter); if (stage == POST_STAGE_ARMFW_RDY) return 0; @@ -734,7 +681,7 @@ int be_fw_wait_ready(struct be_adapter *adapter) err: dev_err(dev, "POST timeout; stage=%#x\n", stage); - return -1; + return -ETIMEDOUT; } static inline struct be_sge *nonembedded_sgl(struct be_mcc_wrb *wrb) @@ -2123,16 +2070,12 @@ int be_cmd_reset_function(struct be_adapter *adapter) int status; if (lancer_chip(adapter)) { + iowrite32(SLI_PORT_CONTROL_IP_MASK, + adapter->db + SLIPORT_CONTROL_OFFSET); status = lancer_wait_ready(adapter); - if (!status) { - iowrite32(SLI_PORT_CONTROL_IP_MASK, - adapter->db + SLIPORT_CONTROL_OFFSET); - status = lancer_test_and_set_rdy_state(adapter); - } - if (status) { + if (status) dev_err(&adapter->pdev->dev, "Adapter in non recoverable error\n"); - } return status; } @@ -3075,7 +3018,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, mac_count = resp->true_mac_count + resp->pseudo_mac_count; /* Mac list returned could contain one or more active mac_ids - * or one or more true or pseudo permanant mac addresses. + * or one or more true or pseudo permanent mac addresses. * If an active mac_id is present, return first active mac_id * found. */ @@ -3130,7 +3073,7 @@ int be_cmd_get_perm_mac(struct be_adapter *adapter, u8 *mac) int status; bool pmac_valid = false; - memset(mac, 0, ETH_ALEN); + eth_zero_addr(mac); if (BEx_chip(adapter)) { if (be_physfn(adapter)) @@ -3631,12 +3574,12 @@ static void be_copy_nic_desc(struct be_resources *res, res->max_rss_qs = le16_to_cpu(desc->rssq_count); res->max_rx_qs = le16_to_cpu(desc->rq_count); res->max_evt_qs = le16_to_cpu(desc->eq_count); + res->max_cq_count = le16_to_cpu(desc->cq_count); + res->max_iface_count = le16_to_cpu(desc->iface_count); + res->max_mcc_count = le16_to_cpu(desc->mcc_count); /* Clear flags that driver is not interested in */ res->if_cap_flags = le32_to_cpu(desc->cap_flags) & BE_IF_CAP_FLAGS_WANT; - /* Need 1 RXQ as the default RXQ */ - if (res->max_rss_qs && res->max_rss_qs == res->max_rx_qs) - res->max_rss_qs -= 1; } /* Uses Mbox */ @@ -3698,7 +3641,7 @@ err: /* Will use MBOX only if MCCQ has not been created */ int be_cmd_get_profile_config(struct be_adapter *adapter, - struct be_resources *res, u8 domain) + struct be_resources *res, u8 query, u8 domain) { struct be_cmd_resp_get_profile_config *resp; struct be_cmd_req_get_profile_config *req; @@ -3708,7 +3651,7 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, struct be_nic_res_desc *nic; struct be_mcc_wrb wrb = {0}; struct be_dma_mem cmd; - u32 desc_count; + u16 desc_count; int status; memset(&cmd, 0, sizeof(struct be_dma_mem)); @@ -3727,12 +3670,19 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, req->hdr.version = 1; req->type = ACTIVE_PROFILE_TYPE; + /* When QUERY_MODIFIABLE_FIELDS_TYPE bit is set, cmd returns the + * descriptors with all bits set to "1" for the fields which can be + * modified using SET_PROFILE_CONFIG cmd. + */ + if (query == RESOURCE_MODIFIABLE) + req->type |= QUERY_MODIFIABLE_FIELDS_TYPE; + status = be_cmd_notify_wait(adapter, &wrb); if (status) goto err; resp = cmd.va; - desc_count = le32_to_cpu(resp->desc_count); + desc_count = le16_to_cpu(resp->desc_count); pcie = be_get_pcie_desc(adapter->pdev->devfn, resp->func_param, desc_count); @@ -3857,23 +3807,80 @@ int be_cmd_config_qos(struct be_adapter *adapter, u32 max_rate, u16 link_speed, 1, version, domain); } +static void be_fill_vf_res_template(struct be_adapter *adapter, + struct be_resources pool_res, + u16 num_vfs, u16 num_vf_qs, + struct be_nic_res_desc *nic_vft) +{ + u32 vf_if_cap_flags = pool_res.vf_if_cap_flags; + struct be_resources res_mod = {0}; + + /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd, + * which are modifiable using SET_PROFILE_CONFIG cmd. + */ + be_cmd_get_profile_config(adapter, &res_mod, RESOURCE_MODIFIABLE, 0); + + /* If RSS IFACE capability flags are modifiable for a VF, set the + * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if + * more than 1 RSSQ is available for a VF. + * Otherwise, provision only 1 queue pair for VF. + */ + if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) { + nic_vft->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT); + if (num_vf_qs > 1) { + vf_if_cap_flags |= BE_IF_FLAGS_RSS; + if (pool_res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS) + vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS; + } else { + vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS | + BE_IF_FLAGS_DEFQ_RSS); + } + + nic_vft->cap_flags = cpu_to_le32(vf_if_cap_flags); + } else { + num_vf_qs = 1; + } + + nic_vft->rq_count = cpu_to_le16(num_vf_qs); + nic_vft->txq_count = cpu_to_le16(num_vf_qs); + nic_vft->rssq_count = cpu_to_le16(num_vf_qs); + nic_vft->cq_count = cpu_to_le16(pool_res.max_cq_count / + (num_vfs + 1)); + + /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally + * among the PF and it's VFs, if the fields are changeable + */ + if (res_mod.max_uc_mac == FIELD_MODIFIABLE) + nic_vft->unicast_mac_count = cpu_to_le16(pool_res.max_uc_mac / + (num_vfs + 1)); + + if (res_mod.max_vlans == FIELD_MODIFIABLE) + nic_vft->vlan_count = cpu_to_le16(pool_res.max_vlans / + (num_vfs + 1)); + + if (res_mod.max_iface_count == FIELD_MODIFIABLE) + nic_vft->iface_count = cpu_to_le16(pool_res.max_iface_count / + (num_vfs + 1)); + + if (res_mod.max_mcc_count == FIELD_MODIFIABLE) + nic_vft->mcc_count = cpu_to_le16(pool_res.max_mcc_count / + (num_vfs + 1)); +} + int be_cmd_set_sriov_config(struct be_adapter *adapter, - struct be_resources res, u16 num_vfs) + struct be_resources pool_res, u16 num_vfs, + u16 num_vf_qs) { struct { struct be_pcie_res_desc pcie; struct be_nic_res_desc nic_vft; } __packed desc; - u16 vf_q_count; - - if (BEx_chip(adapter) || lancer_chip(adapter)) - return 0; /* PF PCIE descriptor */ be_reset_pcie_desc(&desc.pcie); desc.pcie.hdr.desc_type = PCIE_RESOURCE_DESC_TYPE_V1; desc.pcie.hdr.desc_len = RESOURCE_DESC_SIZE_V1; - desc.pcie.flags = (1 << IMM_SHIFT) | (1 << NOSV_SHIFT); + desc.pcie.flags = BIT(IMM_SHIFT) | BIT(NOSV_SHIFT); desc.pcie.pf_num = adapter->pdev->devfn; desc.pcie.sriov_state = num_vfs ? 1 : 0; desc.pcie.num_vfs = cpu_to_le16(num_vfs); @@ -3882,32 +3889,12 @@ int be_cmd_set_sriov_config(struct be_adapter *adapter, be_reset_nic_desc(&desc.nic_vft); desc.nic_vft.hdr.desc_type = NIC_RESOURCE_DESC_TYPE_V1; desc.nic_vft.hdr.desc_len = RESOURCE_DESC_SIZE_V1; - desc.nic_vft.flags = (1 << VFT_SHIFT) | (1 << IMM_SHIFT) | - (1 << NOSV_SHIFT); + desc.nic_vft.flags = BIT(VFT_SHIFT) | BIT(IMM_SHIFT) | BIT(NOSV_SHIFT); desc.nic_vft.pf_num = adapter->pdev->devfn; desc.nic_vft.vf_num = 0; - if (num_vfs && res.vf_if_cap_flags & BE_IF_FLAGS_RSS) { - /* If number of VFs requested is 8 less than max supported, - * assign 8 queue pairs to the PF and divide the remaining - * resources evenly among the VFs - */ - if (num_vfs < (be_max_vfs(adapter) - 8)) - vf_q_count = (res.max_rss_qs - 8) / num_vfs; - else - vf_q_count = res.max_rss_qs / num_vfs; - - desc.nic_vft.rq_count = cpu_to_le16(vf_q_count); - desc.nic_vft.txq_count = cpu_to_le16(vf_q_count); - desc.nic_vft.rssq_count = cpu_to_le16(vf_q_count - 1); - desc.nic_vft.cq_count = cpu_to_le16(3 * vf_q_count); - } else { - desc.nic_vft.txq_count = cpu_to_le16(1); - desc.nic_vft.rq_count = cpu_to_le16(1); - desc.nic_vft.rssq_count = cpu_to_le16(0); - /* One CQ for each TX, RX and MCCQ */ - desc.nic_vft.cq_count = cpu_to_le16(3); - } + be_fill_vf_res_template(adapter, pool_res, num_vfs, num_vf_qs, + &desc.nic_vft); return be_cmd_set_profile_config(adapter, &desc, 2 * RESOURCE_DESC_SIZE_V1, 2, 1, 0); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index a7634a3f052a..1ec22300e254 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -588,14 +588,15 @@ enum be_if_flags { BE_IF_FLAGS_MCAST_PROMISCUOUS = 0x200, BE_IF_FLAGS_PASS_L2_ERRORS = 0x400, BE_IF_FLAGS_PASS_L3L4_ERRORS = 0x800, - BE_IF_FLAGS_MULTICAST = 0x1000 + BE_IF_FLAGS_MULTICAST = 0x1000, + BE_IF_FLAGS_DEFQ_RSS = 0x1000000 }; #define BE_IF_CAP_FLAGS_WANT (BE_IF_FLAGS_RSS | BE_IF_FLAGS_PROMISCUOUS |\ BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_VLAN_PROMISCUOUS |\ BE_IF_FLAGS_VLAN | BE_IF_FLAGS_MCAST_PROMISCUOUS |\ BE_IF_FLAGS_PASS_L3L4_ERRORS | BE_IF_FLAGS_MULTICAST |\ - BE_IF_FLAGS_UNTAGGED) + BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_DEFQ_RSS) #define BE_IF_FLAGS_ALL_PROMISCUOUS (BE_IF_FLAGS_PROMISCUOUS | \ BE_IF_FLAGS_VLAN_PROMISCUOUS |\ @@ -2021,6 +2022,7 @@ struct be_cmd_req_set_ext_fat_caps { #define PORT_RESOURCE_DESC_TYPE_V1 0x55 #define MAX_RESOURCE_DESC 264 +#define IF_CAPS_FLAGS_VALID_SHIFT 0 /* IF caps valid */ #define VFT_SHIFT 3 /* VF template */ #define IMM_SHIFT 6 /* Immediate */ #define NOSV_SHIFT 7 /* No save */ @@ -2131,20 +2133,28 @@ struct be_cmd_resp_get_func_config { u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE_V1]; }; -#define ACTIVE_PROFILE_TYPE 0x2 +enum { + RESOURCE_LIMITS, + RESOURCE_MODIFIABLE +}; + struct be_cmd_req_get_profile_config { struct be_cmd_req_hdr hdr; u8 rsvd; +#define ACTIVE_PROFILE_TYPE 0x2 +#define QUERY_MODIFIABLE_FIELDS_TYPE BIT(3) u8 type; u16 rsvd1; }; struct be_cmd_resp_get_profile_config { struct be_cmd_resp_hdr hdr; - u32 desc_count; + __le16 desc_count; + u16 rsvd; u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE_V1]; }; +#define FIELD_MODIFIABLE 0xFFFF struct be_cmd_req_set_profile_config { struct be_cmd_req_hdr hdr; u32 rsvd; @@ -2344,7 +2354,7 @@ int be_cmd_query_port_name(struct be_adapter *adapter); int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res); int be_cmd_get_profile_config(struct be_adapter *adapter, - struct be_resources *res, u8 domain); + struct be_resources *res, u8 query, u8 domain); int be_cmd_get_active_profile(struct be_adapter *adapter, u16 *profile); int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, int vf_num); @@ -2355,4 +2365,5 @@ int be_cmd_set_logical_link_config(struct be_adapter *adapter, int be_cmd_set_vxlan_port(struct be_adapter *adapter, __be16 port); int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op); int be_cmd_set_sriov_config(struct be_adapter *adapter, - struct be_resources res, u16 num_vfs); + struct be_resources res, u16 num_vfs, + u16 num_vf_qs); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 4d2de4700769..b765c24625bf 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1097,7 +1097,7 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter, return status; if (be_multi_rxq(adapter)) { - for (j = 0; j < 128; j += adapter->num_rx_qs - 1) { + for (j = 0; j < 128; j += adapter->num_rss_qs) { for_all_rss_queues(adapter, rxo, i) { if ((j + i) >= 128) break; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index e6b790f0d9dc..fb0bc3c3620e 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -30,6 +30,9 @@ MODULE_DESCRIPTION(DRV_DESC " " DRV_VER); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("GPL"); +/* num_vfs module param is obsolete. + * Use sysfs method to enable/disable VFs. + */ static unsigned int num_vfs; module_param(num_vfs, uint, S_IRUGO); MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize"); @@ -727,48 +730,86 @@ static u16 skb_ip_proto(struct sk_buff *skb) ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr; } -static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr, - struct sk_buff *skb, u32 wrb_cnt, u32 len, - bool skip_hw_vlan) +static inline bool be_is_txq_full(struct be_tx_obj *txo) { - u16 vlan_tag, proto; + return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len; +} - memset(hdr, 0, sizeof(*hdr)); +static inline bool be_can_txq_wake(struct be_tx_obj *txo) +{ + return atomic_read(&txo->q.used) < txo->q.len / 2; +} - SET_TX_WRB_HDR_BITS(crc, hdr, 1); +static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo) +{ + return atomic_read(&txo->q.used) > txo->pend_wrb_cnt; +} + +static void be_get_wrb_params_from_skb(struct be_adapter *adapter, + struct sk_buff *skb, + struct be_wrb_params *wrb_params) +{ + u16 proto; if (skb_is_gso(skb)) { - SET_TX_WRB_HDR_BITS(lso, hdr, 1); - SET_TX_WRB_HDR_BITS(lso_mss, hdr, skb_shinfo(skb)->gso_size); + BE_WRB_F_SET(wrb_params->features, LSO, 1); + wrb_params->lso_mss = skb_shinfo(skb)->gso_size; if (skb_is_gso_v6(skb) && !lancer_chip(adapter)) - SET_TX_WRB_HDR_BITS(lso6, hdr, 1); + BE_WRB_F_SET(wrb_params->features, LSO6, 1); } else if (skb->ip_summed == CHECKSUM_PARTIAL) { if (skb->encapsulation) { - SET_TX_WRB_HDR_BITS(ipcs, hdr, 1); + BE_WRB_F_SET(wrb_params->features, IPCS, 1); proto = skb_inner_ip_proto(skb); } else { proto = skb_ip_proto(skb); } if (proto == IPPROTO_TCP) - SET_TX_WRB_HDR_BITS(tcpcs, hdr, 1); + BE_WRB_F_SET(wrb_params->features, TCPCS, 1); else if (proto == IPPROTO_UDP) - SET_TX_WRB_HDR_BITS(udpcs, hdr, 1); + BE_WRB_F_SET(wrb_params->features, UDPCS, 1); } if (skb_vlan_tag_present(skb)) { - SET_TX_WRB_HDR_BITS(vlan, hdr, 1); - vlan_tag = be_get_tx_vlan_tag(adapter, skb); - SET_TX_WRB_HDR_BITS(vlan_tag, hdr, vlan_tag); + BE_WRB_F_SET(wrb_params->features, VLAN, 1); + wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb); } - SET_TX_WRB_HDR_BITS(num_wrb, hdr, wrb_cnt); - SET_TX_WRB_HDR_BITS(len, hdr, len); + BE_WRB_F_SET(wrb_params->features, CRC, 1); +} - /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0 - * When this hack is not needed, the evt bit is set while ringing DB +static void wrb_fill_hdr(struct be_adapter *adapter, + struct be_eth_hdr_wrb *hdr, + struct be_wrb_params *wrb_params, + struct sk_buff *skb) +{ + memset(hdr, 0, sizeof(*hdr)); + + SET_TX_WRB_HDR_BITS(crc, hdr, + BE_WRB_F_GET(wrb_params->features, CRC)); + SET_TX_WRB_HDR_BITS(ipcs, hdr, + BE_WRB_F_GET(wrb_params->features, IPCS)); + SET_TX_WRB_HDR_BITS(tcpcs, hdr, + BE_WRB_F_GET(wrb_params->features, TCPCS)); + SET_TX_WRB_HDR_BITS(udpcs, hdr, + BE_WRB_F_GET(wrb_params->features, UDPCS)); + + SET_TX_WRB_HDR_BITS(lso, hdr, + BE_WRB_F_GET(wrb_params->features, LSO)); + SET_TX_WRB_HDR_BITS(lso6, hdr, + BE_WRB_F_GET(wrb_params->features, LSO6)); + SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss); + + /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this + * hack is not needed, the evt bit is set while ringing DB. */ - if (skip_hw_vlan) - SET_TX_WRB_HDR_BITS(event, hdr, 1); + SET_TX_WRB_HDR_BITS(event, hdr, + BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW)); + SET_TX_WRB_HDR_BITS(vlan, hdr, + BE_WRB_F_GET(wrb_params->features, VLAN)); + SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag); + + SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb)); + SET_TX_WRB_HDR_BITS(len, hdr, skb->len); } static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb, @@ -788,77 +829,124 @@ static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb, } } -/* Returns the number of WRBs used up by the skb */ +/* Grab a WRB header for xmit */ +static u16 be_tx_get_wrb_hdr(struct be_tx_obj *txo) +{ + u16 head = txo->q.head; + + queue_head_inc(&txo->q); + return head; +} + +/* Set up the WRB header for xmit */ +static void be_tx_setup_wrb_hdr(struct be_adapter *adapter, + struct be_tx_obj *txo, + struct be_wrb_params *wrb_params, + struct sk_buff *skb, u16 head) +{ + u32 num_frags = skb_wrb_cnt(skb); + struct be_queue_info *txq = &txo->q; + struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head); + + wrb_fill_hdr(adapter, hdr, wrb_params, skb); + be_dws_cpu_to_le(hdr, sizeof(*hdr)); + + BUG_ON(txo->sent_skb_list[head]); + txo->sent_skb_list[head] = skb; + txo->last_req_hdr = head; + atomic_add(num_frags, &txq->used); + txo->last_req_wrb_cnt = num_frags; + txo->pend_wrb_cnt += num_frags; +} + +/* Setup a WRB fragment (buffer descriptor) for xmit */ +static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr, + int len) +{ + struct be_eth_wrb *wrb; + struct be_queue_info *txq = &txo->q; + + wrb = queue_head_node(txq); + wrb_fill(wrb, busaddr, len); + queue_head_inc(txq); +} + +/* Bring the queue back to the state it was in before be_xmit_enqueue() routine + * was invoked. The producer index is restored to the previous packet and the + * WRBs of the current packet are unmapped. Invoked to handle tx setup errors. + */ +static void be_xmit_restore(struct be_adapter *adapter, + struct be_tx_obj *txo, u16 head, bool map_single, + u32 copied) +{ + struct device *dev; + struct be_eth_wrb *wrb; + struct be_queue_info *txq = &txo->q; + + dev = &adapter->pdev->dev; + txq->head = head; + + /* skip the first wrb (hdr); it's not mapped */ + queue_head_inc(txq); + while (copied) { + wrb = queue_head_node(txq); + unmap_tx_frag(dev, wrb, map_single); + map_single = false; + copied -= le32_to_cpu(wrb->frag_len); + queue_head_inc(txq); + } + + txq->head = head; +} + +/* Enqueue the given packet for transmit. This routine allocates WRBs for the + * packet, dma maps the packet buffers and sets up the WRBs. Returns the number + * of WRBs used up by the packet. + */ static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo, - struct sk_buff *skb, bool skip_hw_vlan) + struct sk_buff *skb, + struct be_wrb_params *wrb_params) { u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb); struct device *dev = &adapter->pdev->dev; struct be_queue_info *txq = &txo->q; - struct be_eth_hdr_wrb *hdr; bool map_single = false; - struct be_eth_wrb *wrb; - dma_addr_t busaddr; u16 head = txq->head; + dma_addr_t busaddr; + int len; - hdr = queue_head_node(txq); - wrb_fill_hdr(adapter, hdr, skb, wrb_cnt, skb->len, skip_hw_vlan); - be_dws_cpu_to_le(hdr, sizeof(*hdr)); - - queue_head_inc(txq); + head = be_tx_get_wrb_hdr(txo); if (skb->len > skb->data_len) { - int len = skb_headlen(skb); + len = skb_headlen(skb); busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, busaddr)) goto dma_err; map_single = true; - wrb = queue_head_node(txq); - wrb_fill(wrb, busaddr, len); - queue_head_inc(txq); + be_tx_setup_wrb_frag(txo, busaddr, len); copied += len; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + len = skb_frag_size(frag); - busaddr = skb_frag_dma_map(dev, frag, 0, - skb_frag_size(frag), DMA_TO_DEVICE); + busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, busaddr)) goto dma_err; - wrb = queue_head_node(txq); - wrb_fill(wrb, busaddr, skb_frag_size(frag)); - queue_head_inc(txq); - copied += skb_frag_size(frag); + be_tx_setup_wrb_frag(txo, busaddr, len); + copied += len; } - BUG_ON(txo->sent_skb_list[head]); - txo->sent_skb_list[head] = skb; - txo->last_req_hdr = head; - atomic_add(wrb_cnt, &txq->used); - txo->last_req_wrb_cnt = wrb_cnt; - txo->pend_wrb_cnt += wrb_cnt; + be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head); be_tx_stats_update(txo, skb); return wrb_cnt; dma_err: - /* Bring the queue back to the state it was in before this - * routine was invoked. - */ - txq->head = head; - /* skip the first wrb (hdr); it's not mapped */ - queue_head_inc(txq); - while (copied) { - wrb = queue_head_node(txq); - unmap_tx_frag(dev, wrb, map_single); - map_single = false; - copied -= le32_to_cpu(wrb->frag_len); - adapter->drv_stats.dma_map_errors++; - queue_head_inc(txq); - } - txq->head = head; + adapter->drv_stats.dma_map_errors++; + be_xmit_restore(adapter, txo, head, map_single, copied); return 0; } @@ -869,7 +957,8 @@ static inline int qnq_async_evt_rcvd(struct be_adapter *adapter) static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, struct sk_buff *skb, - bool *skip_hw_vlan) + struct be_wrb_params + *wrb_params) { u16 vlan_tag = 0; @@ -886,8 +975,7 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to * skip VLAN insertion */ - if (skip_hw_vlan) - *skip_hw_vlan = true; + BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1); } if (vlan_tag) { @@ -905,8 +993,7 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, vlan_tag); if (unlikely(!skb)) return skb; - if (skip_hw_vlan) - *skip_hw_vlan = true; + BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1); } return skb; @@ -946,7 +1033,8 @@ static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb) static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, struct sk_buff *skb, - bool *skip_hw_vlan) + struct be_wrb_params + *wrb_params) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; unsigned int eth_hdr_len; @@ -970,7 +1058,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, */ if (be_pvid_tagging_enabled(adapter) && veh->h_vlan_proto == htons(ETH_P_8021Q)) - *skip_hw_vlan = true; + BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1); /* HW has a bug wherein it will calculate CSUM for VLAN * pkts even though it is disabled. @@ -978,7 +1066,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, */ if (skb->ip_summed != CHECKSUM_PARTIAL && skb_vlan_tag_present(skb)) { - skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan); + skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params); if (unlikely(!skb)) goto err; } @@ -1000,7 +1088,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, */ if (be_ipv6_tx_stall_chk(adapter, skb) && be_vlan_tag_tx_chk(adapter, skb)) { - skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan); + skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params); if (unlikely(!skb)) goto err; } @@ -1014,7 +1102,7 @@ err: static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, struct sk_buff *skb, - bool *skip_hw_vlan) + struct be_wrb_params *wrb_params) { /* Lancer, SH-R ASICs have a bug wherein Packets that are 32 bytes or * less may cause a transmit stall on that port. So the work-around is @@ -1026,7 +1114,7 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, } if (BEx_chip(adapter) || lancer_chip(adapter)) { - skb = be_lancer_xmit_workarounds(adapter, skb, skip_hw_vlan); + skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params); if (!skb) return NULL; } @@ -1060,24 +1148,26 @@ static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo) static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) { - bool skip_hw_vlan = false, flush = !skb->xmit_more; struct be_adapter *adapter = netdev_priv(netdev); u16 q_idx = skb_get_queue_mapping(skb); struct be_tx_obj *txo = &adapter->tx_obj[q_idx]; - struct be_queue_info *txq = &txo->q; + struct be_wrb_params wrb_params = { 0 }; + bool flush = !skb->xmit_more; u16 wrb_cnt; - skb = be_xmit_workarounds(adapter, skb, &skip_hw_vlan); + skb = be_xmit_workarounds(adapter, skb, &wrb_params); if (unlikely(!skb)) goto drop; - wrb_cnt = be_xmit_enqueue(adapter, txo, skb, skip_hw_vlan); + be_get_wrb_params_from_skb(adapter, skb, &wrb_params); + + wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); if (unlikely(!wrb_cnt)) { dev_kfree_skb_any(skb); goto drop; } - if ((atomic_read(&txq->used) + BE_MAX_TX_FRAG_COUNT) >= txq->len) { + if (be_is_txq_full(txo)) { netif_stop_subqueue(netdev, q_idx); tx_stats(txo)->tx_stops++; } @@ -2032,7 +2122,7 @@ static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed) if (rxo->rx_post_starved) rxo->rx_post_starved = false; do { - notify = min(256u, posted); + notify = min(MAX_NUM_POST_ERX_DB, posted); be_rxq_notify(adapter, rxq->id, notify); posted -= notify; } while (posted); @@ -2042,18 +2132,23 @@ static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed) } } -static struct be_eth_tx_compl *be_tx_compl_get(struct be_queue_info *tx_cq) +static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo) { - struct be_eth_tx_compl *txcp = queue_tail_node(tx_cq); + struct be_queue_info *tx_cq = &txo->cq; + struct be_tx_compl_info *txcp = &txo->txcp; + struct be_eth_tx_compl *compl = queue_tail_node(tx_cq); - if (txcp->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0) + if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0) return NULL; + /* Ensure load ordering of valid bit dword and other dwords below */ rmb(); - be_dws_le_to_cpu(txcp, sizeof(*txcp)); + be_dws_le_to_cpu(compl, sizeof(*compl)); - txcp->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0; + txcp->status = GET_TX_COMPL_BITS(status, compl); + txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl); + compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0; queue_tail_inc(tx_cq); return txcp; } @@ -2174,9 +2269,9 @@ static void be_tx_compl_clean(struct be_adapter *adapter) { u16 end_idx, notified_idx, cmpl = 0, timeo = 0, num_wrbs = 0; struct device *dev = &adapter->pdev->dev; - struct be_tx_obj *txo; + struct be_tx_compl_info *txcp; struct be_queue_info *txq; - struct be_eth_tx_compl *txcp; + struct be_tx_obj *txo; int i, pending_txqs; /* Stop polling for compls when HW has been silent for 10ms */ @@ -2187,10 +2282,10 @@ static void be_tx_compl_clean(struct be_adapter *adapter) cmpl = 0; num_wrbs = 0; txq = &txo->q; - while ((txcp = be_tx_compl_get(&txo->cq))) { - end_idx = GET_TX_COMPL_BITS(wrb_index, txcp); - num_wrbs += be_tx_compl_process(adapter, txo, - end_idx); + while ((txcp = be_tx_compl_get(txo))) { + num_wrbs += + be_tx_compl_process(adapter, txo, + txcp->end_index); cmpl++; } if (cmpl) { @@ -2198,7 +2293,7 @@ static void be_tx_compl_clean(struct be_adapter *adapter) atomic_sub(num_wrbs, &txq->used); timeo = 0; } - if (atomic_read(&txq->used) == txo->pend_wrb_cnt) + if (!be_is_tx_compl_pending(txo)) pending_txqs--; } @@ -2247,6 +2342,7 @@ static void be_evt_queues_destroy(struct be_adapter *adapter) napi_hash_del(&eqo->napi); netif_napi_del(&eqo->napi); } + free_cpumask_var(eqo->affinity_mask); be_queue_free(adapter, &eqo->q); } } @@ -2262,6 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter) adapter->cfg_num_qs); for_all_evt_queues(adapter, eqo, i) { + if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), + eqo->affinity_mask); + netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); napi_hash_add(&eqo->napi); @@ -2353,8 +2454,9 @@ static void be_tx_queues_destroy(struct be_adapter *adapter) static int be_tx_qs_create(struct be_adapter *adapter) { - struct be_queue_info *cq, *eq; + struct be_queue_info *cq; struct be_tx_obj *txo; + struct be_eq_obj *eqo; int status, i; adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter)); @@ -2372,8 +2474,8 @@ static int be_tx_qs_create(struct be_adapter *adapter) /* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq */ - eq = &adapter->eq_obj[i % adapter->num_evt_qs].q; - status = be_cmd_cq_create(adapter, cq, eq, false, 3); + eqo = &adapter->eq_obj[i % adapter->num_evt_qs]; + status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3); if (status) return status; @@ -2385,6 +2487,9 @@ static int be_tx_qs_create(struct be_adapter *adapter) status = be_cmd_txq_create(adapter, txo); if (status) return status; + + netif_set_xps_queue(adapter->netdev, eqo->affinity_mask, + eqo->idx); } dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n", @@ -2413,13 +2518,19 @@ static int be_rx_cqs_create(struct be_adapter *adapter) int rc, i; /* We can create as many RSS rings as there are EQs. */ - adapter->num_rx_qs = adapter->num_evt_qs; + adapter->num_rss_qs = adapter->num_evt_qs; + + /* We'll use RSS only if atleast 2 RSS rings are supported. */ + if (adapter->num_rss_qs <= 1) + adapter->num_rss_qs = 0; + + adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq; - /* We'll use RSS only if atleast 2 RSS rings are supported. - * When RSS is used, we'll need a default RXQ for non-IP traffic. + /* When the interface is not capable of RSS rings (and there is no + * need to create a default RXQ) we'll still need one RXQ */ - if (adapter->num_rx_qs > 1) - adapter->num_rx_qs++; + if (adapter->num_rx_qs == 0) + adapter->num_rx_qs = 1; adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE; for_all_rx_queues(adapter, rxo, i) { @@ -2438,8 +2549,7 @@ static int be_rx_cqs_create(struct be_adapter *adapter) } dev_info(&adapter->pdev->dev, - "created %d RSS queue(s) and 1 default RX queue\n", - adapter->num_rx_qs - 1); + "created %d RX queue(s)\n", adapter->num_rx_qs); return 0; } @@ -2549,7 +2659,7 @@ loop_continue: return work_done; } -static inline void be_update_tx_err(struct be_tx_obj *txo, u32 status) +static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status) { switch (status) { case BE_TX_COMP_HDR_PARSE_ERR: @@ -2564,7 +2674,7 @@ static inline void be_update_tx_err(struct be_tx_obj *txo, u32 status) } } -static inline void lancer_update_tx_err(struct be_tx_obj *txo, u32 status) +static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status) { switch (status) { case LANCER_TX_COMP_LSO_ERR: @@ -2589,22 +2699,18 @@ static inline void lancer_update_tx_err(struct be_tx_obj *txo, u32 status) static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo, int idx) { - struct be_eth_tx_compl *txcp; int num_wrbs = 0, work_done = 0; - u32 compl_status; - u16 last_idx; + struct be_tx_compl_info *txcp; - while ((txcp = be_tx_compl_get(&txo->cq))) { - last_idx = GET_TX_COMPL_BITS(wrb_index, txcp); - num_wrbs += be_tx_compl_process(adapter, txo, last_idx); + while ((txcp = be_tx_compl_get(txo))) { + num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index); work_done++; - compl_status = GET_TX_COMPL_BITS(status, txcp); - if (compl_status) { + if (txcp->status) { if (lancer_chip(adapter)) - lancer_update_tx_err(txo, compl_status); + lancer_update_tx_err(txo, txcp->status); else - be_update_tx_err(txo, compl_status); + be_update_tx_err(txo, txcp->status); } } @@ -2615,7 +2721,7 @@ static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo, /* As Tx wrbs have been freed up, wake up netdev queue * if it was stopped due to lack of tx wrbs. */ if (__netif_subqueue_stopped(adapter->netdev, idx) && - atomic_read(&txo->q.used) < txo->q.len / 2) { + be_can_txq_wake(txo)) { netif_wake_subqueue(adapter->netdev, idx); } @@ -2807,12 +2913,12 @@ void be_detect_error(struct be_adapter *adapter) sliport_err2 = ioread32(adapter->db + SLIPORT_ERROR2_OFFSET); adapter->hw_error = true; + error_detected = true; /* Do not log error messages if its a FW reset */ if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 && sliport_err2 == SLIPORT_ERROR_FW_RESET2) { dev_info(dev, "Firmware update in progress\n"); } else { - error_detected = true; dev_err(dev, "Error detected in the card\n"); dev_err(dev, "ERR: sliport status 0x%x\n", sliport_status); @@ -2932,6 +3038,8 @@ static int be_msix_register(struct be_adapter *adapter) status = request_irq(vec, be_msix, 0, eqo->desc, eqo); if (status) goto err_msix; + + irq_set_affinity_hint(vec, eqo->affinity_mask); } return 0; @@ -2976,7 +3084,7 @@ static void be_irq_unregister(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct be_eq_obj *eqo; - int i; + int i, vec; if (!adapter->isr_registered) return; @@ -2988,8 +3096,11 @@ static void be_irq_unregister(struct be_adapter *adapter) } /* MSIx */ - for_all_evt_queues(adapter, eqo, i) - free_irq(be_msix_vec_get(adapter, eqo), eqo); + for_all_evt_queues(adapter, eqo, i) { + vec = be_msix_vec_get(adapter, eqo); + irq_set_affinity_hint(vec, NULL); + free_irq(vec, eqo); + } done: adapter->isr_registered = false; @@ -3071,12 +3182,14 @@ static int be_rx_qs_create(struct be_adapter *adapter) return rc; } - /* The FW would like the default RXQ to be created first */ - rxo = default_rxo(adapter); - rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, rx_frag_size, - adapter->if_handle, false, &rxo->rss_id); - if (rc) - return rc; + if (adapter->need_def_rxq || !adapter->num_rss_qs) { + rxo = default_rxo(adapter); + rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, + rx_frag_size, adapter->if_handle, + false, &rxo->rss_id); + if (rc) + return rc; + } for_all_rss_queues(adapter, rxo, i) { rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, @@ -3087,8 +3200,7 @@ static int be_rx_qs_create(struct be_adapter *adapter) } if (be_multi_rxq(adapter)) { - for (j = 0; j < RSS_INDIR_TABLE_LEN; - j += adapter->num_rx_qs - 1) { + for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) { for_all_rss_queues(adapter, rxo, i) { if ((j + i) >= RSS_INDIR_TABLE_LEN) break; @@ -3179,7 +3291,7 @@ static int be_setup_wol(struct be_adapter *adapter, bool enable) int status = 0; u8 mac[ETH_ALEN]; - memset(mac, 0, ETH_ALEN); + eth_zero_addr(mac); cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config); cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, @@ -3324,6 +3436,14 @@ static void be_cancel_worker(struct be_adapter *adapter) } } +static void be_cancel_err_detection(struct be_adapter *adapter) +{ + if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) { + cancel_delayed_work_sync(&adapter->be_err_detection_work); + adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED; + } +} + static void be_mac_clear(struct be_adapter *adapter) { if (adapter->pmac_id) { @@ -3355,8 +3475,39 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter) } #endif +static u16 be_calculate_vf_qs(struct be_adapter *adapter, u16 num_vfs) +{ + struct be_resources res = adapter->pool_res; + u16 num_vf_qs = 1; + + /* Distribute the queue resources equally among the PF and it's VFs + * Do not distribute queue resources in multi-channel configuration. + */ + if (num_vfs && !be_is_mc(adapter)) { + /* If number of VFs requested is 8 less than max supported, + * assign 8 queue pairs to the PF and divide the remaining + * resources evenly among the VFs + */ + if (num_vfs < (be_max_vfs(adapter) - 8)) + num_vf_qs = (res.max_rss_qs - 8) / num_vfs; + else + num_vf_qs = res.max_rss_qs / num_vfs; + + /* Skyhawk-R chip supports only MAX_RSS_IFACES RSS capable + * interfaces per port. Provide RSS on VFs, only if number + * of VFs requested is less than MAX_RSS_IFACES limit. + */ + if (num_vfs >= MAX_RSS_IFACES) + num_vf_qs = 1; + } + return num_vf_qs; +} + static int be_clear(struct be_adapter *adapter) { + struct pci_dev *pdev = adapter->pdev; + u16 num_vf_qs; + be_cancel_worker(adapter); if (sriov_enabled(adapter)) @@ -3365,9 +3516,14 @@ static int be_clear(struct be_adapter *adapter) /* Re-configure FW to distribute resources evenly across max-supported * number of VFs, only when VFs are not already enabled. */ - if (be_physfn(adapter) && !pci_vfs_assigned(adapter->pdev)) + if (skyhawk_chip(adapter) && be_physfn(adapter) && + !pci_vfs_assigned(pdev)) { + num_vf_qs = be_calculate_vf_qs(adapter, + pci_sriov_get_totalvfs(pdev)); be_cmd_set_sriov_config(adapter, adapter->pool_res, - pci_sriov_get_totalvfs(adapter->pdev)); + pci_sriov_get_totalvfs(pdev), + num_vf_qs); + } #ifdef CONFIG_BE2NET_VXLAN be_disable_vxlan_offloads(adapter); @@ -3391,7 +3547,7 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle, en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS | - BE_IF_FLAGS_RSS; + BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS; en_flags &= cap_flags; @@ -3412,6 +3568,7 @@ static int be_vfs_if_create(struct be_adapter *adapter) for_all_vfs(adapter, vf_cfg, vf) { if (!BE3_chip(adapter)) { status = be_cmd_get_profile_config(adapter, &res, + RESOURCE_LIMITS, vf + 1); if (!status) { cap_flags = res.if_cap_flags; @@ -3585,7 +3742,8 @@ static void BEx_get_resources(struct be_adapter *adapter, /* On a SuperNIC profile, the driver needs to use the * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits */ - be_cmd_get_profile_config(adapter, &super_nic_res, 0); + be_cmd_get_profile_config(adapter, &super_nic_res, + RESOURCE_LIMITS, 0); /* Some old versions of BE3 FW don't report max_tx_qs value */ res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS; } else { @@ -3605,6 +3763,7 @@ static void BEx_get_resources(struct be_adapter *adapter, res->max_evt_qs = 1; res->if_cap_flags = BE_IF_CAP_FLAGS_WANT; + res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS; if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS)) res->if_cap_flags &= ~BE_IF_FLAGS_RSS; } @@ -3624,13 +3783,12 @@ static void be_setup_init(struct be_adapter *adapter) static int be_get_sriov_config(struct be_adapter *adapter) { - struct device *dev = &adapter->pdev->dev; struct be_resources res = {0}; int max_vfs, old_vfs; - /* Some old versions of BE3 FW don't report max_vfs value */ - be_cmd_get_profile_config(adapter, &res, 0); + be_cmd_get_profile_config(adapter, &res, RESOURCE_LIMITS, 0); + /* Some old versions of BE3 FW don't report max_vfs value */ if (BE3_chip(adapter) && !res.max_vfs) { max_vfs = pci_sriov_get_totalvfs(adapter->pdev); res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0; @@ -3638,35 +3796,49 @@ static int be_get_sriov_config(struct be_adapter *adapter) adapter->pool_res = res; - if (!be_max_vfs(adapter)) { - if (num_vfs) - dev_warn(dev, "SRIOV is disabled. Ignoring num_vfs\n"); - adapter->num_vfs = 0; - return 0; - } - - pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter)); - - /* validate num_vfs module param */ + /* If during previous unload of the driver, the VFs were not disabled, + * then we cannot rely on the PF POOL limits for the TotalVFs value. + * Instead use the TotalVFs value stored in the pci-dev struct. + */ old_vfs = pci_num_vf(adapter->pdev); if (old_vfs) { - dev_info(dev, "%d VFs are already enabled\n", old_vfs); - if (old_vfs != num_vfs) - dev_warn(dev, "Ignoring num_vfs=%d setting\n", num_vfs); + dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n", + old_vfs); + + adapter->pool_res.max_vfs = + pci_sriov_get_totalvfs(adapter->pdev); adapter->num_vfs = old_vfs; - } else { - if (num_vfs > be_max_vfs(adapter)) { - dev_info(dev, "Resources unavailable to init %d VFs\n", - num_vfs); - dev_info(dev, "Limiting to %d VFs\n", - be_max_vfs(adapter)); - } - adapter->num_vfs = min_t(u16, num_vfs, be_max_vfs(adapter)); } return 0; } +static void be_alloc_sriov_res(struct be_adapter *adapter) +{ + int old_vfs = pci_num_vf(adapter->pdev); + u16 num_vf_qs; + int status; + + be_get_sriov_config(adapter); + + if (!old_vfs) + pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter)); + + /* When the HW is in SRIOV capable configuration, the PF-pool + * resources are given to PF during driver load, if there are no + * old VFs. This facility is not available in BE3 FW. + * Also, this is done by FW in Lancer chip. + */ + if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) { + num_vf_qs = be_calculate_vf_qs(adapter, 0); + status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0, + num_vf_qs); + if (status) + dev_err(&adapter->pdev->dev, + "Failed to optimize SRIOV resources\n"); + } +} + static int be_get_resources(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; @@ -3687,12 +3859,23 @@ static int be_get_resources(struct be_adapter *adapter) if (status) return status; + /* If a deafault RXQ must be created, we'll use up one RSSQ*/ + if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs && + !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)) + res.max_rss_qs -= 1; + /* If RoCE may be enabled stash away half the EQs for RoCE */ if (be_roce_supported(adapter)) res.max_evt_qs /= 2; adapter->res = res; } + /* If FW supports RSS default queue, then skip creating non-RSS + * queue for non-IP traffic. + */ + adapter->need_def_rxq = (be_if_cap_flags(adapter) & + BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1; + dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n", be_max_txqs(adapter), be_max_rxqs(adapter), be_max_rss(adapter), be_max_eqs(adapter), @@ -3701,47 +3884,33 @@ static int be_get_resources(struct be_adapter *adapter) be_max_uc(adapter), be_max_mc(adapter), be_max_vlans(adapter)); + /* Sanitize cfg_num_qs based on HW and platform limits */ + adapter->cfg_num_qs = min_t(u16, netif_get_num_default_rss_queues(), + be_max_qs(adapter)); return 0; } -static void be_sriov_config(struct be_adapter *adapter) -{ - struct device *dev = &adapter->pdev->dev; - int status; - - status = be_get_sriov_config(adapter); - if (status) { - dev_err(dev, "Failed to query SR-IOV configuration\n"); - dev_err(dev, "SR-IOV cannot be enabled\n"); - return; - } - - /* When the HW is in SRIOV capable configuration, the PF-pool - * resources are equally distributed across the max-number of - * VFs. The user may request only a subset of the max-vfs to be - * enabled. Based on num_vfs, redistribute the resources across - * num_vfs so that each VF will have access to more number of - * resources. This facility is not available in BE3 FW. - * Also, this is done by FW in Lancer chip. - */ - if (be_max_vfs(adapter) && !pci_num_vf(adapter->pdev)) { - status = be_cmd_set_sriov_config(adapter, - adapter->pool_res, - adapter->num_vfs); - if (status) - dev_err(dev, "Failed to optimize SR-IOV resources\n"); - } -} - static int be_get_config(struct be_adapter *adapter) { + int status, level; u16 profile_id; - int status; + + status = be_cmd_get_cntl_attributes(adapter); + if (status) + return status; status = be_cmd_query_fw_cfg(adapter); if (status) return status; + if (BEx_chip(adapter)) { + level = be_cmd_get_fw_log_level(adapter); + adapter->msg_enable = + level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0; + } + + be_cmd_get_acpi_wol_cap(adapter); + be_cmd_query_port_name(adapter); if (be_physfn(adapter)) { @@ -3751,9 +3920,6 @@ static int be_get_config(struct be_adapter *adapter) "Using profile 0x%x\n", profile_id); } - if (!BE2_chip(adapter) && be_physfn(adapter)) - be_sriov_config(adapter); - status = be_get_resources(adapter); if (status) return status; @@ -3763,9 +3929,6 @@ static int be_get_config(struct be_adapter *adapter) if (!adapter->pmac_id) return -ENOMEM; - /* Sanitize cfg_num_qs based on HW and platform limits */ - adapter->cfg_num_qs = min(adapter->cfg_num_qs, be_max_qs(adapter)); - return 0; } @@ -3799,6 +3962,13 @@ static void be_schedule_worker(struct be_adapter *adapter) adapter->flags |= BE_FLAGS_WORKER_SCHEDULED; } +static void be_schedule_err_detection(struct be_adapter *adapter) +{ + schedule_delayed_work(&adapter->be_err_detection_work, + msecs_to_jiffies(1000)); + adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED; +} + static int be_setup_queues(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -3881,16 +4051,61 @@ static inline int fw_major_num(const char *fw_ver) return fw_major; } +/* If any VFs are already enabled don't FLR the PF */ +static bool be_reset_required(struct be_adapter *adapter) +{ + return pci_num_vf(adapter->pdev) ? false : true; +} + +/* Wait for the FW to be ready and perform the required initialization */ +static int be_func_init(struct be_adapter *adapter) +{ + int status; + + status = be_fw_wait_ready(adapter); + if (status) + return status; + + if (be_reset_required(adapter)) { + status = be_cmd_reset_function(adapter); + if (status) + return status; + + /* Wait for interrupts to quiesce after an FLR */ + msleep(100); + + /* We can clear all errors when function reset succeeds */ + be_clear_all_error(adapter); + } + + /* Tell FW we're ready to fire cmds */ + status = be_cmd_fw_init(adapter); + if (status) + return status; + + /* Allow interrupts for other ULPs running on NIC function */ + be_intr_set(adapter, true); + + return 0; +} + static int be_setup(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; int status; + status = be_func_init(adapter); + if (status) + return status; + be_setup_init(adapter); if (!lancer_chip(adapter)) be_cmd_req_native_mode(adapter); + if (!BE2_chip(adapter) && be_physfn(adapter)) + be_alloc_sriov_res(adapter); + status = be_get_config(adapter); if (status) goto err; @@ -3931,8 +4146,6 @@ static int be_setup(struct be_adapter *adapter) be_set_rx_mode(adapter->netdev); - be_cmd_get_acpi_wol_cap(adapter); - status = be_cmd_set_flow_control(adapter, adapter->tx_fc, adapter->rx_fc); if (status) @@ -4842,6 +5055,142 @@ static void be_netdev_init(struct net_device *netdev) netdev->ethtool_ops = &be_ethtool_ops; } +static void be_cleanup(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + rtnl_lock(); + netif_device_detach(netdev); + if (netif_running(netdev)) + be_close(netdev); + rtnl_unlock(); + + be_clear(adapter); +} + +static int be_resume(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int status; + + status = be_setup(adapter); + if (status) + return status; + + if (netif_running(netdev)) { + status = be_open(netdev); + if (status) + return status; + } + + netif_device_attach(netdev); + + return 0; +} + +static int be_err_recover(struct be_adapter *adapter) +{ + struct device *dev = &adapter->pdev->dev; + int status; + + status = be_resume(adapter); + if (status) + goto err; + + dev_info(dev, "Adapter recovery successful\n"); + return 0; +err: + if (be_physfn(adapter)) + dev_err(dev, "Adapter recovery failed\n"); + else + dev_err(dev, "Re-trying adapter recovery\n"); + + return status; +} + +static void be_err_detection_task(struct work_struct *work) +{ + struct be_adapter *adapter = + container_of(work, struct be_adapter, + be_err_detection_work.work); + int status = 0; + + be_detect_error(adapter); + + if (adapter->hw_error) { + be_cleanup(adapter); + + /* As of now error recovery support is in Lancer only */ + if (lancer_chip(adapter)) + status = be_err_recover(adapter); + } + + /* Always attempt recovery on VFs */ + if (!status || be_virtfn(adapter)) + be_schedule_err_detection(adapter); +} + +static void be_log_sfp_info(struct be_adapter *adapter) +{ + int status; + + status = be_cmd_query_sfp_info(adapter); + if (!status) { + dev_err(&adapter->pdev->dev, + "Unqualified SFP+ detected on %c from %s part no: %s", + adapter->port_name, adapter->phy.vendor_name, + adapter->phy.vendor_pn); + } + adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP; +} + +static void be_worker(struct work_struct *work) +{ + struct be_adapter *adapter = + container_of(work, struct be_adapter, work.work); + struct be_rx_obj *rxo; + int i; + + /* when interrupts are not yet enabled, just reap any pending + * mcc completions + */ + if (!netif_running(adapter->netdev)) { + local_bh_disable(); + be_process_mcc(adapter); + local_bh_enable(); + goto reschedule; + } + + if (!adapter->stats_cmd_sent) { + if (lancer_chip(adapter)) + lancer_cmd_get_pport_stats(adapter, + &adapter->stats_cmd); + else + be_cmd_get_stats(adapter, &adapter->stats_cmd); + } + + if (be_physfn(adapter) && + MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0) + be_cmd_get_die_temperature(adapter); + + for_all_rx_queues(adapter, rxo, i) { + /* Replenish RX-queues starved due to memory + * allocation failures. + */ + if (rxo->rx_post_starved) + be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); + } + + be_eqd_update(adapter); + + if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP) + be_log_sfp_info(adapter); + +reschedule: + adapter->work_counter++; + schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); +} + static void be_unmap_pci_bars(struct be_adapter *adapter) { if (adapter->csr) @@ -4874,6 +5223,12 @@ static int be_map_pci_bars(struct be_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; u8 __iomem *addr; + u32 sli_intf; + + pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); + adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >> + SLI_INTF_FAMILY_SHIFT; + adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; if (BEx_chip(adapter) && be_physfn(adapter)) { adapter->csr = pci_iomap(pdev, 2, 0); @@ -4907,109 +5262,93 @@ pci_map_err: return -ENOMEM; } -static void be_ctrl_cleanup(struct be_adapter *adapter) +static void be_drv_cleanup(struct be_adapter *adapter) { struct be_dma_mem *mem = &adapter->mbox_mem_alloced; - - be_unmap_pci_bars(adapter); + struct device *dev = &adapter->pdev->dev; if (mem->va) - dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, - mem->dma); + dma_free_coherent(dev, mem->size, mem->va, mem->dma); mem = &adapter->rx_filter; if (mem->va) - dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, - mem->dma); + dma_free_coherent(dev, mem->size, mem->va, mem->dma); + + mem = &adapter->stats_cmd; + if (mem->va) + dma_free_coherent(dev, mem->size, mem->va, mem->dma); } -static int be_ctrl_init(struct be_adapter *adapter) +/* Allocate and initialize various fields in be_adapter struct */ +static int be_drv_init(struct be_adapter *adapter) { struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced; struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem; struct be_dma_mem *rx_filter = &adapter->rx_filter; - u32 sli_intf; - int status; - - pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); - adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >> - SLI_INTF_FAMILY_SHIFT; - adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; - - status = be_map_pci_bars(adapter); - if (status) - goto done; + struct be_dma_mem *stats_cmd = &adapter->stats_cmd; + struct device *dev = &adapter->pdev->dev; + int status = 0; mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16; - mbox_mem_alloc->va = dma_alloc_coherent(&adapter->pdev->dev, - mbox_mem_alloc->size, + mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size, &mbox_mem_alloc->dma, GFP_KERNEL); - if (!mbox_mem_alloc->va) { - status = -ENOMEM; - goto unmap_pci_bars; - } + if (!mbox_mem_alloc->va) + return -ENOMEM; + mbox_mem_align->size = sizeof(struct be_mcc_mailbox); mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16); mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16); memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox)); rx_filter->size = sizeof(struct be_cmd_req_rx_filter); - rx_filter->va = dma_zalloc_coherent(&adapter->pdev->dev, - rx_filter->size, &rx_filter->dma, - GFP_KERNEL); + rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size, + &rx_filter->dma, GFP_KERNEL); if (!rx_filter->va) { status = -ENOMEM; goto free_mbox; } + if (lancer_chip(adapter)) + stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats); + else if (BE2_chip(adapter)) + stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0); + else if (BE3_chip(adapter)) + stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1); + else + stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2); + stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size, + &stats_cmd->dma, GFP_KERNEL); + if (!stats_cmd->va) { + status = -ENOMEM; + goto free_rx_filter; + } + mutex_init(&adapter->mbox_lock); spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); - init_completion(&adapter->et_cmd_compl); - pci_save_state(adapter->pdev); - return 0; -free_mbox: - dma_free_coherent(&adapter->pdev->dev, mbox_mem_alloc->size, - mbox_mem_alloc->va, mbox_mem_alloc->dma); - -unmap_pci_bars: - be_unmap_pci_bars(adapter); - -done: - return status; -} - -static void be_stats_cleanup(struct be_adapter *adapter) -{ - struct be_dma_mem *cmd = &adapter->stats_cmd; + pci_save_state(adapter->pdev); - if (cmd->va) - dma_free_coherent(&adapter->pdev->dev, cmd->size, - cmd->va, cmd->dma); -} + INIT_DELAYED_WORK(&adapter->work, be_worker); + INIT_DELAYED_WORK(&adapter->be_err_detection_work, + be_err_detection_task); -static int be_stats_init(struct be_adapter *adapter) -{ - struct be_dma_mem *cmd = &adapter->stats_cmd; + adapter->rx_fc = true; + adapter->tx_fc = true; - if (lancer_chip(adapter)) - cmd->size = sizeof(struct lancer_cmd_req_pport_stats); - else if (BE2_chip(adapter)) - cmd->size = sizeof(struct be_cmd_req_get_stats_v0); - else if (BE3_chip(adapter)) - cmd->size = sizeof(struct be_cmd_req_get_stats_v1); - else - /* ALL non-BE ASICs */ - cmd->size = sizeof(struct be_cmd_req_get_stats_v2); + /* Must be a power of 2 or else MODULO will BUG_ON */ + adapter->be_get_temp_freq = 64; - cmd->va = dma_zalloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma, - GFP_KERNEL); - if (!cmd->va) - return -ENOMEM; return 0; + +free_rx_filter: + dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma); +free_mbox: + dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va, + mbox_mem_alloc->dma); + return status; } static void be_remove(struct pci_dev *pdev) @@ -5022,7 +5361,7 @@ static void be_remove(struct pci_dev *pdev) be_roce_dev_remove(adapter); be_intr_set(adapter, false); - cancel_delayed_work_sync(&adapter->func_recovery_work); + be_cancel_err_detection(adapter); unregister_netdev(adapter->netdev); @@ -5031,9 +5370,8 @@ static void be_remove(struct pci_dev *pdev) /* tell fw we're done with firing cmds */ be_cmd_fw_clean(adapter); - be_stats_cleanup(adapter); - - be_ctrl_cleanup(adapter); + be_unmap_pci_bars(adapter); + be_drv_cleanup(adapter); pci_disable_pcie_error_reporting(pdev); @@ -5043,156 +5381,6 @@ static void be_remove(struct pci_dev *pdev) free_netdev(adapter->netdev); } -static int be_get_initial_config(struct be_adapter *adapter) -{ - int status, level; - - status = be_cmd_get_cntl_attributes(adapter); - if (status) - return status; - - /* Must be a power of 2 or else MODULO will BUG_ON */ - adapter->be_get_temp_freq = 64; - - if (BEx_chip(adapter)) { - level = be_cmd_get_fw_log_level(adapter); - adapter->msg_enable = - level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0; - } - - adapter->cfg_num_qs = netif_get_num_default_rss_queues(); - return 0; -} - -static int lancer_recover_func(struct be_adapter *adapter) -{ - struct device *dev = &adapter->pdev->dev; - int status; - - status = lancer_test_and_set_rdy_state(adapter); - if (status) - goto err; - - if (netif_running(adapter->netdev)) - be_close(adapter->netdev); - - be_clear(adapter); - - be_clear_all_error(adapter); - - status = be_setup(adapter); - if (status) - goto err; - - if (netif_running(adapter->netdev)) { - status = be_open(adapter->netdev); - if (status) - goto err; - } - - dev_err(dev, "Adapter recovery successful\n"); - return 0; -err: - if (status == -EAGAIN) - dev_err(dev, "Waiting for resource provisioning\n"); - else - dev_err(dev, "Adapter recovery failed\n"); - - return status; -} - -static void be_func_recovery_task(struct work_struct *work) -{ - struct be_adapter *adapter = - container_of(work, struct be_adapter, func_recovery_work.work); - int status = 0; - - be_detect_error(adapter); - - if (adapter->hw_error && lancer_chip(adapter)) { - rtnl_lock(); - netif_device_detach(adapter->netdev); - rtnl_unlock(); - - status = lancer_recover_func(adapter); - if (!status) - netif_device_attach(adapter->netdev); - } - - /* In Lancer, for all errors other than provisioning error (-EAGAIN), - * no need to attempt further recovery. - */ - if (!status || status == -EAGAIN) - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); -} - -static void be_log_sfp_info(struct be_adapter *adapter) -{ - int status; - - status = be_cmd_query_sfp_info(adapter); - if (!status) { - dev_err(&adapter->pdev->dev, - "Unqualified SFP+ detected on %c from %s part no: %s", - adapter->port_name, adapter->phy.vendor_name, - adapter->phy.vendor_pn); - } - adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP; -} - -static void be_worker(struct work_struct *work) -{ - struct be_adapter *adapter = - container_of(work, struct be_adapter, work.work); - struct be_rx_obj *rxo; - int i; - - /* when interrupts are not yet enabled, just reap any pending - * mcc completions */ - if (!netif_running(adapter->netdev)) { - local_bh_disable(); - be_process_mcc(adapter); - local_bh_enable(); - goto reschedule; - } - - if (!adapter->stats_cmd_sent) { - if (lancer_chip(adapter)) - lancer_cmd_get_pport_stats(adapter, - &adapter->stats_cmd); - else - be_cmd_get_stats(adapter, &adapter->stats_cmd); - } - - if (be_physfn(adapter) && - MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0) - be_cmd_get_die_temperature(adapter); - - for_all_rx_queues(adapter, rxo, i) { - /* Replenish RX-queues starved due to memory - * allocation failures. - */ - if (rxo->rx_post_starved) - be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); - } - - be_eqd_update(adapter); - - if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP) - be_log_sfp_info(adapter); - -reschedule: - adapter->work_counter++; - schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); -} - -/* If any VFs are already enabled don't FLR the PF */ -static bool be_reset_required(struct be_adapter *adapter) -{ - return pci_num_vf(adapter->pdev) ? false : true; -} - static char *mc_name(struct be_adapter *adapter) { char *str = ""; /* default */ @@ -5291,50 +5479,17 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) if (!status) dev_info(&pdev->dev, "PCIe error reporting enabled\n"); - status = be_ctrl_init(adapter); + status = be_map_pci_bars(adapter); if (status) goto free_netdev; - /* sync up with fw's ready state */ - if (be_physfn(adapter)) { - status = be_fw_wait_ready(adapter); - if (status) - goto ctrl_clean; - } - - if (be_reset_required(adapter)) { - status = be_cmd_reset_function(adapter); - if (status) - goto ctrl_clean; - - /* Wait for interrupts to quiesce after an FLR */ - msleep(100); - } - - /* Allow interrupts for other ULPs running on NIC function */ - be_intr_set(adapter, true); - - /* tell fw we're ready to fire cmds */ - status = be_cmd_fw_init(adapter); - if (status) - goto ctrl_clean; - - status = be_stats_init(adapter); - if (status) - goto ctrl_clean; - - status = be_get_initial_config(adapter); + status = be_drv_init(adapter); if (status) - goto stats_clean; - - INIT_DELAYED_WORK(&adapter->work, be_worker); - INIT_DELAYED_WORK(&adapter->func_recovery_work, be_func_recovery_task); - adapter->rx_fc = true; - adapter->tx_fc = true; + goto unmap_bars; status = be_setup(adapter); if (status) - goto stats_clean; + goto drv_cleanup; be_netdev_init(netdev); status = register_netdev(netdev); @@ -5343,8 +5498,7 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) be_roce_dev_add(adapter); - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); + be_schedule_err_detection(adapter); dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev), func_name(adapter), mc_name(adapter), adapter->port_name); @@ -5353,10 +5507,10 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) unsetup: be_clear(adapter); -stats_clean: - be_stats_cleanup(adapter); -ctrl_clean: - be_ctrl_cleanup(adapter); +drv_cleanup: + be_drv_cleanup(adapter); +unmap_bars: + be_unmap_pci_bars(adapter); free_netdev: free_netdev(netdev); rel_reg: @@ -5371,21 +5525,14 @@ do_none: static int be_suspend(struct pci_dev *pdev, pm_message_t state) { struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; if (adapter->wol_en) be_setup_wol(adapter, true); be_intr_set(adapter, false); - cancel_delayed_work_sync(&adapter->func_recovery_work); + be_cancel_err_detection(adapter); - netif_device_detach(netdev); - if (netif_running(netdev)) { - rtnl_lock(); - be_close(netdev); - rtnl_unlock(); - } - be_clear(adapter); + be_cleanup(adapter); pci_save_state(pdev); pci_disable_device(pdev); @@ -5393,13 +5540,10 @@ static int be_suspend(struct pci_dev *pdev, pm_message_t state) return 0; } -static int be_resume(struct pci_dev *pdev) +static int be_pci_resume(struct pci_dev *pdev) { - int status = 0; struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; - - netif_device_detach(netdev); + int status = 0; status = pci_enable_device(pdev); if (status) @@ -5408,30 +5552,11 @@ static int be_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - status = be_fw_wait_ready(adapter); - if (status) - return status; - - status = be_cmd_reset_function(adapter); - if (status) - return status; - - be_intr_set(adapter, true); - /* tell fw we're ready to fire cmds */ - status = be_cmd_fw_init(adapter); + status = be_resume(adapter); if (status) return status; - be_setup(adapter); - if (netif_running(netdev)) { - rtnl_lock(); - be_open(netdev); - rtnl_unlock(); - } - - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); - netif_device_attach(netdev); + be_schedule_err_detection(adapter); if (adapter->wol_en) be_setup_wol(adapter, false); @@ -5451,7 +5576,7 @@ static void be_shutdown(struct pci_dev *pdev) be_roce_dev_shutdown(adapter); cancel_delayed_work_sync(&adapter->work); - cancel_delayed_work_sync(&adapter->func_recovery_work); + be_cancel_err_detection(adapter); netif_device_detach(adapter->netdev); @@ -5464,22 +5589,15 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; dev_err(&adapter->pdev->dev, "EEH error detected\n"); if (!adapter->eeh_error) { adapter->eeh_error = true; - cancel_delayed_work_sync(&adapter->func_recovery_work); - - rtnl_lock(); - netif_device_detach(netdev); - if (netif_running(netdev)) - be_close(netdev); - rtnl_unlock(); + be_cancel_err_detection(adapter); - be_clear(adapter); + be_cleanup(adapter); } if (state == pci_channel_io_perm_failure) @@ -5530,43 +5648,73 @@ static void be_eeh_resume(struct pci_dev *pdev) { int status = 0; struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; dev_info(&adapter->pdev->dev, "EEH resume\n"); pci_save_state(pdev); - status = be_cmd_reset_function(adapter); + status = be_resume(adapter); if (status) goto err; - /* On some BE3 FW versions, after a HW reset, - * interrupts will remain disabled for each function. - * So, explicitly enable interrupts + be_schedule_err_detection(adapter); + return; +err: + dev_err(&adapter->pdev->dev, "EEH resume failed\n"); +} + +static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct be_adapter *adapter = pci_get_drvdata(pdev); + u16 num_vf_qs; + int status; + + if (!num_vfs) + be_vf_clear(adapter); + + adapter->num_vfs = num_vfs; + + if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, + "Cannot disable VFs while they are assigned\n"); + return -EBUSY; + } + + /* When the HW is in SRIOV capable configuration, the PF-pool resources + * are equally distributed across the max-number of VFs. The user may + * request only a subset of the max-vfs to be enabled. + * Based on num_vfs, redistribute the resources across num_vfs so that + * each VF will have access to more number of resources. + * This facility is not available in BE3 FW. + * Also, this is done by FW in Lancer chip. */ - be_intr_set(adapter, true); + if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) { + num_vf_qs = be_calculate_vf_qs(adapter, adapter->num_vfs); + status = be_cmd_set_sriov_config(adapter, adapter->pool_res, + adapter->num_vfs, num_vf_qs); + if (status) + dev_err(&pdev->dev, + "Failed to optimize SR-IOV resources\n"); + } - /* tell fw we're ready to fire cmds */ - status = be_cmd_fw_init(adapter); + status = be_get_resources(adapter); if (status) - goto err; + return be_cmd_status(status); - status = be_setup(adapter); + /* Updating real_num_tx/rx_queues() requires rtnl_lock() */ + rtnl_lock(); + status = be_update_queues(adapter); + rtnl_unlock(); if (status) - goto err; + return be_cmd_status(status); - if (netif_running(netdev)) { - status = be_open(netdev); - if (status) - goto err; - } + if (adapter->num_vfs) + status = be_vf_setup(adapter); - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); - netif_device_attach(netdev); - return; -err: - dev_err(&adapter->pdev->dev, "EEH resume failed\n"); + if (!status) + return adapter->num_vfs; + + return 0; } static const struct pci_error_handlers be_eeh_handlers = { @@ -5581,8 +5729,9 @@ static struct pci_driver be_driver = { .probe = be_probe, .remove = be_remove, .suspend = be_suspend, - .resume = be_resume, + .resume = be_pci_resume, .shutdown = be_shutdown, + .sriov_configure = be_pci_sriov_configure, .err_handler = &be_eeh_handlers }; @@ -5596,6 +5745,11 @@ static int __init be_init_module(void) rx_frag_size = 2048; } + if (num_vfs > 0) { + pr_info(DRV_NAME " : Module param num_vfs is obsolete."); + pr_info(DRV_NAME " : Use sysfs method to enable VFs\n"); + } + return pci_register_driver(&be_driver); } module_init(be_init_module); |