summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/ice/ice_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_main.c')
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c237
1 files changed, 170 insertions, 67 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 93979ab18bc1..c8286adae946 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -64,6 +64,7 @@ struct device *ice_hw_to_dev(struct ice_hw *hw)
}
static struct workqueue_struct *ice_wq;
+struct workqueue_struct *ice_lag_wq;
static const struct net_device_ops ice_netdev_safe_mode_ops;
static const struct net_device_ops ice_netdev_ops;
@@ -80,7 +81,7 @@ ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
void *data,
void (*cleanup)(struct flow_block_cb *block_cb));
-bool netif_is_ice(struct net_device *dev)
+bool netif_is_ice(const struct net_device *dev)
{
return dev && (dev->netdev_ops == &ice_netdev_ops);
}
@@ -635,6 +636,11 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
+ if (pf->lag && pf->lag->bonded && reset_type == ICE_RESET_PFR) {
+ dev_dbg(dev, "PFR on a bonded interface, promoting to CORER\n");
+ reset_type = ICE_RESET_CORER;
+ }
+
ice_prepare_for_reset(pf, reset_type);
/* trigger the reset */
@@ -718,8 +724,13 @@ static void ice_reset_subtask(struct ice_pf *pf)
}
/* No pending resets to finish processing. Check for new resets */
- if (test_bit(ICE_PFR_REQ, pf->state))
+ if (test_bit(ICE_PFR_REQ, pf->state)) {
reset_type = ICE_RESET_PFR;
+ if (pf->lag && pf->lag->bonded) {
+ dev_dbg(ice_pf_to_dev(pf), "PFR on a bonded interface, promoting to CORER\n");
+ reset_type = ICE_RESET_CORER;
+ }
+ }
if (test_bit(ICE_CORER_REQ, pf->state))
reset_type = ICE_RESET_CORER;
if (test_bit(ICE_GLOBR_REQ, pf->state))
@@ -1239,64 +1250,63 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
return status;
}
-enum ice_aq_task_state {
- ICE_AQ_TASK_WAITING = 0,
- ICE_AQ_TASK_COMPLETE,
- ICE_AQ_TASK_CANCELED,
-};
-
-struct ice_aq_task {
- struct hlist_node entry;
+/**
+ * ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware
+ * @pf: pointer to the PF private structure
+ * @task: intermediate helper storage and identifier for waiting
+ * @opcode: the opcode to wait for
+ *
+ * Prepares to wait for a specific AdminQ completion event on the ARQ for
+ * a given PF. Actual wait would be done by a call to ice_aq_wait_for_event().
+ *
+ * Calls are separated to allow caller registering for event before sending
+ * the command, which mitigates a race between registering and FW responding.
+ *
+ * To obtain only the descriptor contents, pass an task->event with null
+ * msg_buf. If the complete data buffer is desired, allocate the
+ * task->event.msg_buf with enough space ahead of time.
+ */
+void ice_aq_prep_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+ u16 opcode)
+{
+ INIT_HLIST_NODE(&task->entry);
+ task->opcode = opcode;
+ task->state = ICE_AQ_TASK_WAITING;
- u16 opcode;
- struct ice_rq_event_info *event;
- enum ice_aq_task_state state;
-};
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_add_head(&task->entry, &pf->aq_wait_list);
+ spin_unlock_bh(&pf->aq_wait_lock);
+}
/**
* ice_aq_wait_for_event - Wait for an AdminQ event from firmware
* @pf: pointer to the PF private structure
- * @opcode: the opcode to wait for
+ * @task: ptr prepared by ice_aq_prep_for_event()
* @timeout: how long to wait, in jiffies
- * @event: storage for the event info
*
* Waits for a specific AdminQ completion event on the ARQ for a given PF. The
* current thread will be put to sleep until the specified event occurs or
* until the given timeout is reached.
*
- * To obtain only the descriptor contents, pass an event without an allocated
- * msg_buf. If the complete data buffer is desired, allocate the
- * event->msg_buf with enough space ahead of time.
- *
* Returns: zero on success, or a negative error code on failure.
*/
-int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
- struct ice_rq_event_info *event)
+int ice_aq_wait_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+ unsigned long timeout)
{
+ enum ice_aq_task_state *state = &task->state;
struct device *dev = ice_pf_to_dev(pf);
- struct ice_aq_task *task;
- unsigned long start;
+ unsigned long start = jiffies;
long ret;
int err;
- task = kzalloc(sizeof(*task), GFP_KERNEL);
- if (!task)
- return -ENOMEM;
-
- INIT_HLIST_NODE(&task->entry);
- task->opcode = opcode;
- task->event = event;
- task->state = ICE_AQ_TASK_WAITING;
-
- spin_lock_bh(&pf->aq_wait_lock);
- hlist_add_head(&task->entry, &pf->aq_wait_list);
- spin_unlock_bh(&pf->aq_wait_lock);
-
- start = jiffies;
-
- ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state,
+ ret = wait_event_interruptible_timeout(pf->aq_wait_queue,
+ *state != ICE_AQ_TASK_WAITING,
timeout);
- switch (task->state) {
+ switch (*state) {
+ case ICE_AQ_TASK_NOT_PREPARED:
+ WARN(1, "call to %s without ice_aq_prep_for_event()", __func__);
+ err = -EINVAL;
+ break;
case ICE_AQ_TASK_WAITING:
err = ret < 0 ? ret : -ETIMEDOUT;
break;
@@ -1307,7 +1317,7 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
err = ret < 0 ? ret : 0;
break;
default:
- WARN(1, "Unexpected AdminQ wait task state %u", task->state);
+ WARN(1, "Unexpected AdminQ wait task state %u", *state);
err = -EINVAL;
break;
}
@@ -1315,12 +1325,11 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
jiffies_to_msecs(jiffies - start),
jiffies_to_msecs(timeout),
- opcode);
+ task->opcode);
spin_lock_bh(&pf->aq_wait_lock);
hlist_del(&task->entry);
spin_unlock_bh(&pf->aq_wait_lock);
- kfree(task);
return err;
}
@@ -1346,23 +1355,26 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
struct ice_rq_event_info *event)
{
+ struct ice_rq_event_info *task_ev;
struct ice_aq_task *task;
bool found = false;
spin_lock_bh(&pf->aq_wait_lock);
hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
- if (task->state || task->opcode != opcode)
+ if (task->state != ICE_AQ_TASK_WAITING)
+ continue;
+ if (task->opcode != opcode)
continue;
- memcpy(&task->event->desc, &event->desc, sizeof(event->desc));
- task->event->msg_len = event->msg_len;
+ task_ev = &task->event;
+ memcpy(&task_ev->desc, &event->desc, sizeof(event->desc));
+ task_ev->msg_len = event->msg_len;
/* Only copy the data buffer if a destination was set */
- if (task->event->msg_buf &&
- task->event->buf_len > event->buf_len) {
- memcpy(task->event->msg_buf, event->msg_buf,
+ if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) {
+ memcpy(task_ev->msg_buf, event->msg_buf,
event->buf_len);
- task->event->buf_len = event->buf_len;
+ task_ev->buf_len = event->buf_len;
}
task->state = ICE_AQ_TASK_COMPLETE;
@@ -3392,6 +3404,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY |
NETDEV_XDP_ACT_RX_SG;
+ netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD;
}
/**
@@ -3794,6 +3807,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf)
static void ice_deinit_pf(struct ice_pf *pf)
{
ice_service_task_stop(pf);
+ mutex_destroy(&pf->lag_mutex);
mutex_destroy(&pf->adev_mutex);
mutex_destroy(&pf->sw_mutex);
mutex_destroy(&pf->tc_mutex);
@@ -3874,6 +3888,7 @@ static int ice_init_pf(struct ice_pf *pf)
mutex_init(&pf->sw_mutex);
mutex_init(&pf->tc_mutex);
mutex_init(&pf->adev_mutex);
+ mutex_init(&pf->lag_mutex);
INIT_HLIST_HEAD(&pf->aq_wait_list);
spin_lock_init(&pf->aq_wait_lock);
@@ -4430,9 +4445,9 @@ static int ice_start_eth(struct ice_vsi *vsi)
if (err)
return err;
- rtnl_lock();
err = ice_vsi_open(vsi);
- rtnl_unlock();
+ if (err)
+ ice_fltr_remove_all(vsi);
return err;
}
@@ -4506,6 +4521,31 @@ static void ice_deinit_eth(struct ice_pf *pf)
ice_decfg_netdev(vsi);
}
+/**
+ * ice_wait_for_fw - wait for full FW readiness
+ * @hw: pointer to the hardware structure
+ * @timeout: milliseconds that can elapse before timing out
+ */
+static int ice_wait_for_fw(struct ice_hw *hw, u32 timeout)
+{
+ int fw_loading;
+ u32 elapsed = 0;
+
+ while (elapsed <= timeout) {
+ fw_loading = rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M;
+
+ /* firmware was not yet loaded, we have to wait more */
+ if (fw_loading) {
+ elapsed += 100;
+ msleep(100);
+ continue;
+ }
+ return 0;
+ }
+
+ return -ETIMEDOUT;
+}
+
static int ice_init_dev(struct ice_pf *pf)
{
struct device *dev = ice_pf_to_dev(pf);
@@ -4518,6 +4558,18 @@ static int ice_init_dev(struct ice_pf *pf)
return err;
}
+ /* Some cards require longer initialization times
+ * due to necessity of loading FW from an external source.
+ * This can take even half a minute.
+ */
+ if (ice_is_pf_c827(hw)) {
+ err = ice_wait_for_fw(hw, 30000);
+ if (err) {
+ dev_err(dev, "ice_wait_for_fw timed out");
+ return err;
+ }
+ }
+
ice_init_feature_support(pf);
ice_request_fw(pf);
@@ -4895,6 +4947,7 @@ int ice_load(struct ice_pf *pf)
params = ice_vsi_to_params(vsi);
params.flags = ICE_VSI_FLAG_INIT;
+ rtnl_lock();
err = ice_vsi_cfg(vsi, &params);
if (err)
goto err_vsi_cfg;
@@ -4902,6 +4955,7 @@ int ice_load(struct ice_pf *pf)
err = ice_start_eth(ice_get_main_vsi(pf));
if (err)
goto err_start_eth;
+ rtnl_unlock();
err = ice_init_rdma(pf);
if (err)
@@ -4916,9 +4970,11 @@ int ice_load(struct ice_pf *pf)
err_init_rdma:
ice_vsi_close(ice_get_main_vsi(pf));
+ rtnl_lock();
err_start_eth:
ice_vsi_decfg(ice_get_main_vsi(pf));
err_vsi_cfg:
+ rtnl_unlock();
ice_deinit_dev(pf);
return err;
}
@@ -4931,8 +4987,10 @@ void ice_unload(struct ice_pf *pf)
{
ice_deinit_features(pf);
ice_deinit_rdma(pf);
+ rtnl_lock();
ice_stop_eth(ice_get_main_vsi(pf));
ice_vsi_decfg(ice_get_main_vsi(pf));
+ rtnl_unlock();
ice_deinit_dev(pf);
}
@@ -5564,7 +5622,7 @@ static struct pci_driver ice_driver = {
*/
static int __init ice_module_init(void)
{
- int status;
+ int status = -ENOMEM;
pr_info("%s\n", ice_driver_string);
pr_info("%s\n", ice_copyright);
@@ -5572,15 +5630,27 @@ static int __init ice_module_init(void)
ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME);
if (!ice_wq) {
pr_err("Failed to create workqueue\n");
- return -ENOMEM;
+ return status;
+ }
+
+ ice_lag_wq = alloc_ordered_workqueue("ice_lag_wq", 0);
+ if (!ice_lag_wq) {
+ pr_err("Failed to create LAG workqueue\n");
+ goto err_dest_wq;
}
status = pci_register_driver(&ice_driver);
if (status) {
pr_err("failed to register PCI driver, err %d\n", status);
- destroy_workqueue(ice_wq);
+ goto err_dest_lag_wq;
}
+ return 0;
+
+err_dest_lag_wq:
+ destroy_workqueue(ice_lag_wq);
+err_dest_wq:
+ destroy_workqueue(ice_wq);
return status;
}
module_init(ice_module_init);
@@ -5595,6 +5665,7 @@ static void __exit ice_module_exit(void)
{
pci_unregister_driver(&ice_driver);
destroy_workqueue(ice_wq);
+ destroy_workqueue(ice_lag_wq);
pr_info("module unloaded\n");
}
module_exit(ice_module_exit);
@@ -5697,7 +5768,7 @@ static void ice_set_rx_mode(struct net_device *netdev)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
- if (!vsi)
+ if (!vsi || ice_is_switchdev_running(vsi->back))
return;
/* Set the flags to synchronize filters
@@ -5739,6 +5810,13 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
q_handle = vsi->tx_rings[queue_index]->q_handle;
tc = ice_dcb_get_tc(vsi, queue_index);
+ vsi = ice_locate_vsi_using_queue(vsi, queue_index);
+ if (!vsi) {
+ netdev_err(netdev, "Invalid VSI for given queue %d\n",
+ queue_index);
+ return -EINVAL;
+ }
+
/* Set BW back to default, when user set maxrate to 0 */
if (!maxrate)
status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
@@ -6242,7 +6320,7 @@ static void ice_tx_dim_work(struct work_struct *work)
u16 itr;
dim = container_of(work, struct dim, work);
- rc = (struct ice_ring_container *)dim->priv;
+ rc = dim->priv;
WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
@@ -6262,7 +6340,7 @@ static void ice_rx_dim_work(struct work_struct *work)
u16 itr;
dim = container_of(work, struct dim, work);
- rc = (struct ice_ring_container *)dim->priv;
+ rc = dim->priv;
WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
@@ -7343,6 +7421,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
clear_bit(ICE_RESET_FAILED, pf->state);
ice_plug_aux_dev(pf);
+ if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
+ ice_lag_rebuild(pf);
return;
err_vsi_rebuild:
@@ -7872,10 +7952,10 @@ static int
ice_validate_mqprio_qopt(struct ice_vsi *vsi,
struct tc_mqprio_qopt_offload *mqprio_qopt)
{
- u64 sum_max_rate = 0, sum_min_rate = 0;
int non_power_of_2_qcount = 0;
struct ice_pf *pf = vsi->back;
int max_rss_q_cnt = 0;
+ u64 sum_min_rate = 0;
struct device *dev;
int i, speed;
u8 num_tc;
@@ -7891,6 +7971,7 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
dev = ice_pf_to_dev(pf);
vsi->ch_rss_size = 0;
num_tc = mqprio_qopt->qopt.num_tc;
+ speed = ice_get_link_speed_kbps(vsi);
for (i = 0; num_tc; i++) {
int qcount = mqprio_qopt->qopt.count[i];
@@ -7931,7 +8012,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
*/
max_rate = mqprio_qopt->max_rate[i];
max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR);
- sum_max_rate += max_rate;
/* min_rate is minimum guaranteed rate and it can't be zero */
min_rate = mqprio_qopt->min_rate[i];
@@ -7944,6 +8024,12 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
return -EINVAL;
}
+ if (max_rate && max_rate > speed) {
+ dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n",
+ i, max_rate, speed);
+ return -EINVAL;
+ }
+
iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem);
if (rem) {
dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
@@ -7981,12 +8067,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
return -EINVAL;
- speed = ice_get_link_speed_kbps(vsi);
- if (sum_max_rate && sum_max_rate > (u64)speed) {
- dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n",
- sum_max_rate, speed);
- return -EINVAL;
- }
if (sum_min_rate && sum_min_rate > (u64)speed) {
dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
sum_min_rate, speed);
@@ -8800,6 +8880,7 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
+ bool locked = false;
int err;
switch (type) {
@@ -8809,10 +8890,32 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
ice_setup_tc_block_cb,
np, np, true);
case TC_SETUP_QDISC_MQPRIO:
+ if (ice_is_eswitch_mode_switchdev(pf)) {
+ netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (pf->adev) {
+ mutex_lock(&pf->adev_mutex);
+ device_lock(&pf->adev->dev);
+ locked = true;
+ if (pf->adev->dev.driver) {
+ netdev_err(netdev, "Cannot change qdisc when RDMA is active\n");
+ err = -EBUSY;
+ goto adev_unlock;
+ }
+ }
+
/* setup traffic classifier for receive side */
mutex_lock(&pf->tc_mutex);
err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
mutex_unlock(&pf->tc_mutex);
+
+adev_unlock:
+ if (locked) {
+ device_unlock(&pf->adev->dev);
+ mutex_unlock(&pf->adev_mutex);
+ }
return err;
default:
return -EOPNOTSUPP;