diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_main.c')
| -rw-r--r-- | drivers/net/ethernet/intel/ice/ice_main.c | 1100 |
1 files changed, 615 insertions, 485 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ec636be4d17d..4bb68e7a00f5 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -14,7 +14,8 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" #include "devlink/devlink.h" -#include "devlink/devlink_port.h" +#include "devlink/port.h" +#include "ice_sf_eth.h" #include "ice_hwmon.h" /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the * ice tracepoint functions. This must be done exactly once across the @@ -36,7 +37,11 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; #define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg" MODULE_DESCRIPTION(DRV_SUMMARY); -MODULE_IMPORT_NS(LIBIE); +MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); +MODULE_IMPORT_NS("LIBIE"); +MODULE_IMPORT_NS("LIBIE_ADMINQ"); +MODULE_IMPORT_NS("LIBIE_FWLOG"); MODULE_LICENSE("GPL v2"); MODULE_FIRMWARE(ICE_DDP_PKG_FILE); @@ -86,7 +91,8 @@ ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, bool netif_is_ice(const struct net_device *dev) { - return dev && (dev->netdev_ops == &ice_netdev_ops); + return dev && (dev->netdev_ops == &ice_netdev_ops || + dev->netdev_ops == &ice_netdev_safe_mode_ops); } /** @@ -377,7 +383,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) * should go into promiscuous mode. There should be some * space reserved for promiscuous filters. */ - if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC && + if (hw->adminq.sq_last_status == LIBIE_AQ_RC_ENOSPC && !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC, vsi->state)) { promisc_forced_on = true; @@ -520,25 +526,6 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) } /** - * ice_clear_sw_switch_recipes - clear switch recipes - * @pf: board private structure - * - * Mark switch recipes as not created in sw structures. There are cases where - * rules (especially advanced rules) need to be restored, either re-read from - * hardware or added again. For example after the reset. 'recp_created' flag - * prevents from doing that and need to be cleared upfront. - */ -static void ice_clear_sw_switch_recipes(struct ice_pf *pf) -{ - struct ice_sw_recipe *recp; - u8 i; - - recp = pf->hw.switch_info->recp_list; - for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) - recp[i].recp_created = false; -} - -/** * ice_prepare_for_reset - prep for reset * @pf: board private structure * @reset_type: reset type requested @@ -559,6 +546,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; + synchronize_irq(pf->oicr_irq.virq); + ice_unplug_aux_dev(pf); /* Notify VFs of impending reset */ @@ -572,8 +561,9 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) mutex_unlock(&pf->vfs.table_lock); if (ice_is_eswitch_mode_switchdev(pf)) { - if (reset_type != ICE_RESET_PFR) - ice_clear_sw_switch_recipes(pf); + rtnl_lock(); + ice_eswitch_br_fdb_flush(pf->eswitch.br_offloads->bridge); + rtnl_unlock(); } /* release ADQ specific HW and SW resources */ @@ -606,11 +596,15 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt)); } } + + if (vsi->netdev) + netif_device_detach(vsi->netdev); skip: /* clear SW filtering DB */ ice_clear_hw_tbls(hw); /* disable the VSIs and their queues that are not already DOWN */ + set_bit(ICE_VSI_REBUILD_PENDING, ice_get_main_vsi(pf)->state); ice_pf_dis_all_vsi(pf, false); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) @@ -1129,7 +1123,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (status) dev_dbg(dev, "Failed to update link status on port %d, err %d aq_err %s\n", pi->lport, status, - ice_aq_str(pi->hw->adminq.sq_last_status)); + libie_aq_str(pi->hw->adminq.sq_last_status)); ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); @@ -1154,7 +1148,10 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (link_up == old_link && link_speed == old_link_speed) return 0; - ice_ptp_link_change(pf, pf->hw.pf_id, link_up); + if (!link_up && old_link) + pf->link_down_events++; + + ice_ptp_link_change(pf, link_up); if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -1257,32 +1254,6 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event) } /** - * ice_get_fwlog_data - copy the FW log data from ARQ event - * @pf: PF that the FW log event is associated with - * @event: event structure containing FW log data - */ -static void -ice_get_fwlog_data(struct ice_pf *pf, struct ice_rq_event_info *event) -{ - struct ice_fwlog_data *fwlog; - struct ice_hw *hw = &pf->hw; - - fwlog = &hw->fwlog_ring.rings[hw->fwlog_ring.tail]; - - memset(fwlog->data, 0, PAGE_SIZE); - fwlog->data_size = le16_to_cpu(event->desc.datalen); - - memcpy(fwlog->data, event->msg_buf, fwlog->data_size); - ice_fwlog_ring_increment(&hw->fwlog_ring.tail, hw->fwlog_ring.size); - - if (ice_fwlog_ring_full(&hw->fwlog_ring)) { - /* the rings are full so bump the head to create room */ - ice_fwlog_ring_increment(&hw->fwlog_ring.head, - hw->fwlog_ring.size); - } -} - -/** * ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware * @pf: pointer to the PF private structure * @task: intermediate helper storage and identifier for waiting @@ -1556,19 +1527,31 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) ice_vf_lan_overflow_event(pf, &event); break; case ice_mbx_opc_send_msg_to_pf: - data.num_msg_proc = i; - data.num_pending_arq = pending; - data.max_num_msgs_mbx = hw->mailboxq.num_rq_entries; - data.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK; + if (ice_is_feature_supported(pf, ICE_F_MBX_LIMIT)) { + ice_vc_process_vf_msg(pf, &event, NULL); + ice_mbx_vf_dec_trig_e830(hw, &event); + } else { + u16 val = hw->mailboxq.num_rq_entries; - ice_vc_process_vf_msg(pf, &event, &data); + data.max_num_msgs_mbx = val; + val = ICE_MBX_OVERFLOW_WATERMARK; + data.async_watermark_val = val; + data.num_msg_proc = i; + data.num_pending_arq = pending; + + ice_vc_process_vf_msg(pf, &event, &data); + } break; case ice_aqc_opc_fw_logs_event: - ice_get_fwlog_data(pf, &event); + libie_get_fwlog_data(&hw->fwlog, event.msg_buf, + le16_to_cpu(event.desc.datalen)); break; case ice_aqc_opc_lldp_set_mib_change: ice_dcb_process_lldp_set_mib_change(pf, &event); break; + case ice_aqc_opc_get_health_status: + ice_process_health_status_event(pf, &event); + break; default: dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n", qtype, opcode); @@ -1716,7 +1699,7 @@ static int ice_service_task_stop(struct ice_pf *pf) ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state); if (pf->serv_tmr.function) - del_timer_sync(&pf->serv_tmr); + timer_delete_sync(&pf->serv_tmr); if (pf->serv_task.func) cancel_work_sync(&pf->serv_task); @@ -1742,7 +1725,7 @@ static void ice_service_task_restart(struct ice_pf *pf) */ static void ice_service_timer(struct timer_list *t) { - struct ice_pf *pf = from_timer(pf, t, serv_tmr); + struct ice_pf *pf = timer_container_of(pf, t, serv_tmr); mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies)); ice_service_task_schedule(pf); @@ -1818,6 +1801,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); + ice_report_mdd_event(pf, ICE_MDD_SRC_TX_PQM, pf_num, vf_num, + event, queue); wr32(hw, GL_MDET_TX_PQM, 0xffffffff); } @@ -1831,6 +1816,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); + ice_report_mdd_event(pf, ICE_MDD_SRC_TX_TCLAN, pf_num, vf_num, + event, queue); wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX); } @@ -1844,6 +1831,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); + ice_report_mdd_event(pf, ICE_MDD_SRC_RX, pf_num, vf_num, event, + queue); wr32(hw, GL_MDET_RX, 0xffffffff); } @@ -2357,6 +2346,18 @@ static void ice_check_media_subtask(struct ice_pf *pf) } } +static void ice_service_task_recovery_mode(struct work_struct *work) +{ + struct ice_pf *pf = container_of(work, struct ice_pf, serv_task); + + set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); + ice_clean_adminq_subtask(pf); + + ice_service_task_complete(pf); + + mod_timer(&pf->serv_tmr, jiffies + msecs_to_jiffies(100)); +} + /** * ice_service_task - manage and run subtasks * @work: pointer to work_struct contained by the PF struct @@ -2366,9 +2367,11 @@ static void ice_service_task(struct work_struct *work) struct ice_pf *pf = container_of(work, struct ice_pf, serv_task); unsigned long start_time = jiffies; - /* subtasks */ + if (pf->health_reporters.tx_hang_buf.tx_ring) { + ice_report_tx_hang(pf); + pf->health_reporters.tx_hang_buf.tx_ring = NULL; + } - /* process reset requests first */ ice_reset_subtask(pf); /* bail if a reset/recovery cycle is pending or rebuild failed */ @@ -2380,11 +2383,11 @@ static void ice_service_task(struct work_struct *work) } if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) { - struct iidc_event *event; + struct iidc_rdma_event *event; event = kzalloc(sizeof(*event), GFP_KERNEL); if (event) { - set_bit(IIDC_EVENT_CRIT_ERR, event->type); + set_bit(IIDC_RDMA_EVENT_CRIT_ERR, event->type); /* report the entire OICR value to AUX driver */ swap(event->reg, pf->oicr_err_reg); ice_send_event_to_aux(pf, event); @@ -2403,11 +2406,11 @@ static void ice_service_task(struct work_struct *work) ice_plug_aux_dev(pf); if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) { - struct iidc_event *event; + struct iidc_rdma_event *event; event = kzalloc(sizeof(*event), GFP_KERNEL); if (event) { - set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type); + set_bit(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE, event->type); ice_send_event_to_aux(pf, event); kfree(event); } @@ -2507,34 +2510,6 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) } /** - * ice_irq_affinity_notify - Callback for affinity changes - * @notify: context as to what irq was changed - * @mask: the new affinity mask - * - * This is a callback function used by the irq_set_affinity_notifier function - * so that we may register to receive changes to the irq affinity masks. - */ -static void -ice_irq_affinity_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) -{ - struct ice_q_vector *q_vector = - container_of(notify, struct ice_q_vector, affinity_notify); - - cpumask_copy(&q_vector->affinity_mask, mask); -} - -/** - * ice_irq_affinity_release - Callback for affinity notifier release - * @ref: internal core kernel usage - * - * This is a callback function used by the irq_set_affinity_notifier function - * to inform the current notification subscriber that they will no longer - * receive notifications. - */ -static void ice_irq_affinity_release(struct kref __always_unused *ref) {} - -/** * ice_vsi_ena_irq - Enable IRQ for the given VSI * @vsi: the VSI being configured */ @@ -2597,19 +2572,6 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) err); goto free_q_irqs; } - - /* register for affinity change notifications */ - if (!IS_ENABLED(CONFIG_RFS_ACCEL)) { - struct irq_affinity_notify *affinity_notify; - - affinity_notify = &q_vector->affinity_notify; - affinity_notify->notify = ice_irq_affinity_notify; - affinity_notify->release = ice_irq_affinity_release; - irq_set_affinity_notifier(irq_num, affinity_notify); - } - - /* assign the mask for this irq */ - irq_update_affinity_hint(irq_num, &q_vector->affinity_mask); } err = ice_set_cpu_rx_rmap(vsi); @@ -2625,9 +2587,6 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) free_q_irqs: while (vector--) { irq_num = vsi->q_vectors[vector]->irq.virq; - if (!IS_ENABLED(CONFIG_RFS_ACCEL)) - irq_set_affinity_notifier(irq_num, NULL); - irq_update_affinity_hint(irq_num, NULL); devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]); } return err; @@ -2764,6 +2723,27 @@ void ice_map_xdp_rings(struct ice_vsi *vsi) } /** + * ice_unmap_xdp_rings - Unmap XDP rings from interrupt vectors + * @vsi: the VSI with XDP rings being unmapped + */ +static void ice_unmap_xdp_rings(struct ice_vsi *vsi) +{ + int v_idx; + + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; + struct ice_tx_ring *ring; + + ice_for_each_tx_ring(ring, q_vector->tx) + if (!ring->tx_buf || !ice_ring_is_xdp(ring)) + break; + + /* restore the value of last node prior to XDP setup */ + q_vector->tx.tx_ring = ring; + } +} + +/** * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP * @vsi: VSI to bring up Tx rings used by XDP * @prog: bpf program that will be assigned to VSI @@ -2826,7 +2806,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, if (status) { dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n", status); - goto clear_xdp_rings; + goto unmap_xdp_rings; } /* assign the prog only when it's not already present on VSI; @@ -2842,6 +2822,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, ice_vsi_assign_bpf_prog(vsi, prog); return 0; +unmap_xdp_rings: + ice_unmap_xdp_rings(vsi); clear_xdp_rings: ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { @@ -2858,6 +2840,8 @@ err_map_xdp: mutex_unlock(&pf->avail_q_mutex); devm_kfree(dev, vsi->xdp_rings); + vsi->xdp_rings = NULL; + return -ENOMEM; } @@ -2873,7 +2857,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_pf *pf = vsi->back; - int i, v_idx; + int i; /* q_vectors are freed in reset path so there's no point in detaching * rings @@ -2881,17 +2865,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) if (cfg_type == ICE_XDP_CFG_PART) goto free_qmap; - ice_for_each_q_vector(vsi, v_idx) { - struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; - struct ice_tx_ring *ring; - - ice_for_each_tx_ring(ring, q_vector->tx) - if (!ring->tx_buf || !ice_ring_is_xdp(ring)) - break; - - /* restore the value of last node prior to XDP setup */ - q_vector->tx.tx_ring = ring; - } + ice_unmap_xdp_rings(vsi); free_qmap: mutex_lock(&pf->avail_q_mutex); @@ -2948,7 +2922,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) ice_for_each_rxq(vsi, i) { struct ice_rx_ring *rx_ring = vsi->rx_rings[i]; - if (rx_ring->xsk_pool) + if (READ_ONCE(rx_ring->xsk_pool)) napi_schedule(&rx_ring->q_vector->napi); } } @@ -2968,6 +2942,9 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi) if (avail < cpus / 2) return -ENOMEM; + if (vsi->type == ICE_VSI_SF) + avail = vsi->alloc_txq; + vsi->num_xdp_txq = min_t(u16, avail, cpus); if (vsi->num_xdp_txq < cpus) @@ -2982,10 +2959,7 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi) */ static int ice_max_xdp_frame_size(struct ice_vsi *vsi) { - if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) - return ICE_RXBUF_1664; - else - return ICE_RXBUF_3072; + return ICE_RXBUF_3072; } /** @@ -2999,8 +2973,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, struct netlink_ext_ack *extack) { unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; - bool if_running = netif_running(vsi->netdev); int ret = 0, xdp_ring_err = 0; + bool if_running; if (prog && !prog->aux->xdp_has_frags) { if (frame_size > ice_max_xdp_frame_size(vsi)) { @@ -3011,13 +2985,17 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } /* hot swap progs and avoid toggling link */ - if (ice_is_xdp_ena_vsi(vsi) == !!prog) { + if (ice_is_xdp_ena_vsi(vsi) == !!prog || + test_bit(ICE_VSI_REBUILD_PENDING, vsi->state)) { ice_vsi_assign_bpf_prog(vsi, prog); return 0; } + if_running = netif_running(vsi->netdev) && + !test_and_set_bit(ICE_VSI_DOWN, vsi->state); + /* need to stop netdev while setting up the program for Rx rings */ - if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { + if (if_running) { ret = ice_down(vsi); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed"); @@ -3029,28 +3007,24 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, xdp_ring_err = ice_vsi_determine_xdp_res(vsi); if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); + goto resume_if; } else { xdp_ring_err = ice_prepare_xdp_rings(vsi, prog, ICE_XDP_CFG_FULL); - if (xdp_ring_err) + if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); + goto resume_if; + } } xdp_features_set_redirect_target(vsi->netdev, true); - /* reallocate Rx queues that are used for zero-copy */ - xdp_ring_err = ice_realloc_zc_buf(vsi, true); - if (xdp_ring_err) - NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed"); } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { xdp_features_clear_redirect_target(vsi->netdev); xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); - /* reallocate Rx queues that were used for zero-copy */ - xdp_ring_err = ice_realloc_zc_buf(vsi, false); - if (xdp_ring_err) - NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); } +resume_if: if (if_running) ret = ice_up(vsi); @@ -3079,25 +3053,32 @@ static int ice_xdp_safe_mode(struct net_device __always_unused *dev, * @dev: netdevice * @xdp: XDP command */ -static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) +int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct ice_netdev_priv *np = netdev_priv(dev); struct ice_vsi *vsi = np->vsi; + int ret; - if (vsi->type != ICE_VSI_PF) { - NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI"); + if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_SF) { + NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF or SF VSI"); return -EINVAL; } + mutex_lock(&vsi->xdp_state_lock); + switch (xdp->command) { case XDP_SETUP_PROG: - return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); + ret = ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); + break; case XDP_SETUP_XSK_POOL: - return ice_xsk_pool_setup(vsi, xdp->xsk.pool, - xdp->xsk.queue_id); + ret = ice_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id); + break; default: - return -EINVAL; + ret = -EINVAL; } + + mutex_unlock(&vsi->xdp_state_lock); + return ret; } /** @@ -3162,12 +3143,14 @@ static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data) hw = &pf->hw; tx = &pf->ptp.port.tx; spin_lock_irqsave(&tx->lock, flags); - ice_ptp_complete_tx_single_tstamp(tx); + if (tx->init) { + ice_ptp_complete_tx_single_tstamp(tx); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + } spin_unlock_irqrestore(&tx->lock, flags); val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | @@ -3269,22 +3252,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; - if (ice_pf_state_is_nominal(pf) && - pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) { - struct ice_ptp_tx *tx = &pf->ptp.port.tx; - unsigned long flags; - u8 idx; - - spin_lock_irqsave(&tx->lock, flags); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); - spin_unlock_irqrestore(&tx->lock, flags); - } else if (ice_ptp_pf_handles_tx_interrupt(pf)) { - set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); - ret = IRQ_WAKE_THREAD; - } + + ret = ice_ptp_ts_irq(pf); } if (oicr & PFINT_OICR_TSYN_EVNT_M) { @@ -3539,28 +3508,6 @@ skip_req_irq: } /** - * ice_napi_add - register NAPI handler for the VSI - * @vsi: VSI for which NAPI handler is to be registered - * - * This function is only called in the driver's load path. Registering the NAPI - * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume, - * reset/rebuild, etc.) - */ -static void ice_napi_add(struct ice_vsi *vsi) -{ - int v_idx; - - if (!vsi->netdev) - return; - - ice_for_each_q_vector(vsi, v_idx) { - netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi, - ice_napi_poll); - __ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false); - } -} - -/** * ice_set_ops - set netdev and ethtools ops for the given netdev * @vsi: the VSI associated with the new netdev */ @@ -3593,7 +3540,7 @@ static void ice_set_ops(struct ice_vsi *vsi) * ice_set_netdev_features - set features for the given netdev * @netdev: netdev instance */ -static void ice_set_netdev_features(struct net_device *netdev) +void ice_set_netdev_features(struct net_device *netdev) { struct ice_pf *pf = ice_netdev_to_pf(netdev); bool is_dvm_ena = ice_is_dvm_ena(&pf->hw); @@ -3676,6 +3623,15 @@ static void ice_set_netdev_features(struct net_device *netdev) */ netdev->hw_features |= NETIF_F_RXFCS; + /* Allow core to manage IRQs affinity */ + netif_set_affinity_auto(netdev); + + /* Mutual exclusivity for TSO and GCS is enforced by the set features + * ndo callback. + */ + if (ice_is_feature_supported(pf, ICE_F_GCS)) + netdev->hw_features |= NETIF_F_HW_CSUM; + netif_set_tso_max_size(netdev, ICE_MAX_TSO_SIZE); } @@ -3775,8 +3731,7 @@ ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) * * net_device_ops implementation for adding VLAN IDs */ -static int -ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) +int ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi_vlan_ops *vlan_ops; @@ -3838,8 +3793,7 @@ finish: * * net_device_ops implementation for removing VLAN IDs */ -static int -ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) +int ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi_vlan_ops *vlan_ops; @@ -3986,9 +3940,10 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf) * ice_deinit_pf - Unrolls initialziations done by ice_init_pf * @pf: board private structure to initialize */ -static void ice_deinit_pf(struct ice_pf *pf) +void ice_deinit_pf(struct ice_pf *pf) { - ice_service_task_stop(pf); + /* note that we unroll also on ice_init_pf() failure here */ + mutex_destroy(&pf->lag_mutex); mutex_destroy(&pf->adev_mutex); mutex_destroy(&pf->sw_mutex); @@ -4006,8 +3961,19 @@ static void ice_deinit_pf(struct ice_pf *pf) pf->avail_rxqs = NULL; } + if (pf->txtime_txqs) { + bitmap_free(pf->txtime_txqs); + pf->txtime_txqs = NULL; + } + if (pf->ptp.clock) ptp_clock_unregister(pf->ptp.clock); + + if (!xa_empty(&pf->irq_tracker.entries)) + ice_free_irq_msix_misc(pf); + + xa_destroy(&pf->dyn_ports); + xa_destroy(&pf->sf_nums); } /** @@ -4052,21 +4018,32 @@ static void ice_set_pf_caps(struct ice_pf *pf) } clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); - if (func_caps->common_cap.ieee_1588 && - !(pf->hw.mac_type == ICE_MAC_E830)) + if (func_caps->common_cap.ieee_1588) set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); pf->max_pf_txqs = func_caps->common_cap.num_txq; pf->max_pf_rxqs = func_caps->common_cap.num_rxq; } +void ice_start_service_task(struct ice_pf *pf) +{ + timer_setup(&pf->serv_tmr, ice_service_timer, 0); + pf->serv_tmr_period = HZ; + INIT_WORK(&pf->serv_task, ice_service_task); + clear_bit(ICE_SERVICE_SCHED, pf->state); +} + /** * ice_init_pf - Initialize general software structures (struct ice_pf) * @pf: board private structure to initialize + * Return: 0 on success, negative errno otherwise. */ -static int ice_init_pf(struct ice_pf *pf) +int ice_init_pf(struct ice_pf *pf) { - ice_set_pf_caps(pf); + struct udp_tunnel_nic_info *udp_tunnel_nic = &pf->hw.udp_tunnel_nic; + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + int err = -ENOMEM; mutex_init(&pf->sw_mutex); mutex_init(&pf->tc_mutex); @@ -4079,29 +4056,49 @@ static int ice_init_pf(struct ice_pf *pf) init_waitqueue_head(&pf->reset_wait_queue); - /* setup service timer and periodic service task */ - timer_setup(&pf->serv_tmr, ice_service_timer, 0); - pf->serv_tmr_period = HZ; - INIT_WORK(&pf->serv_task, ice_service_task); - clear_bit(ICE_SERVICE_SCHED, pf->state); - mutex_init(&pf->avail_q_mutex); - pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); - if (!pf->avail_txqs) - return -ENOMEM; - - pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); - if (!pf->avail_rxqs) { - bitmap_free(pf->avail_txqs); - pf->avail_txqs = NULL; - return -ENOMEM; - } mutex_init(&pf->vfs.table_lock); hash_init(pf->vfs.table); - ice_mbx_init_snapshot(&pf->hw); + if (ice_is_feature_supported(pf, ICE_F_MBX_LIMIT)) + wr32(&pf->hw, E830_MBX_PF_IN_FLIGHT_VF_MSGS_THRESH, + ICE_MBX_OVERFLOW_WATERMARK); + else + ice_mbx_init_snapshot(&pf->hw); + + xa_init(&pf->dyn_ports); + xa_init(&pf->sf_nums); + + pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); + pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); + pf->txtime_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); + if (!pf->avail_txqs || !pf->avail_rxqs || !pf->txtime_txqs) + goto undo_init; + + udp_tunnel_nic->set_port = ice_udp_tunnel_set_port; + udp_tunnel_nic->unset_port = ice_udp_tunnel_unset_port; + udp_tunnel_nic->shared = &hw->udp_tunnel_shared; + udp_tunnel_nic->tables[0].n_entries = hw->tnl.valid_count[TNL_VXLAN]; + udp_tunnel_nic->tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; + udp_tunnel_nic->tables[1].n_entries = hw->tnl.valid_count[TNL_GENEVE]; + udp_tunnel_nic->tables[1].tunnel_types = UDP_TUNNEL_TYPE_GENEVE; + + /* In case of MSIX we are going to setup the misc vector right here + * to handle admin queue events etc. In case of legacy and MSI + * the misc functionality and queue processing is combined in + * the same vector and that gets setup at open. + */ + err = ice_req_irq_msix_misc(pf); + if (err) { + dev_err(dev, "setup of misc vector failed: %d\n", err); + goto undo_init; + } return 0; +undo_init: + /* deinit handles half-initialized pf just fine */ + ice_deinit_pf(pf); + return err; } /** @@ -4231,7 +4228,7 @@ static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %d aq_err %s\n", - status, ice_aq_str(hw->adminq.sq_last_status)); + status, libie_aq_str(hw->adminq.sq_last_status)); } else { vsi->info.sec_flags = ctxt->info.sec_flags; vsi->info.sw_flags2 = ctxt->info.sw_flags2; @@ -4533,36 +4530,64 @@ ice_init_tx_topology(struct ice_hw *hw, const struct firmware *firmware) u8 num_tx_sched_layers = hw->num_tx_sched_layers; struct ice_pf *pf = hw->back; struct device *dev; - u8 *buf_copy; int err; dev = ice_pf_to_dev(pf); - /* ice_cfg_tx_topo buf argument is not a constant, - * so we have to make a copy - */ - buf_copy = kmemdup(firmware->data, firmware->size, GFP_KERNEL); - - err = ice_cfg_tx_topo(hw, buf_copy, firmware->size); + err = ice_cfg_tx_topo(hw, firmware->data, firmware->size); if (!err) { if (hw->num_tx_sched_layers > num_tx_sched_layers) dev_info(dev, "Tx scheduling layers switching feature disabled\n"); else dev_info(dev, "Tx scheduling layers switching feature enabled\n"); - /* if there was a change in topology ice_cfg_tx_topo triggered - * a CORER and we need to re-init hw + return 0; + } else if (err == -ENODEV) { + /* If we failed to re-initialize the device, we can no longer + * continue loading. */ - ice_deinit_hw(hw); - err = ice_init_hw(hw); - + dev_warn(dev, "Failed to initialize hardware after applying Tx scheduling configuration.\n"); return err; } else if (err == -EIO) { dev_info(dev, "DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again\n"); + return 0; + } else if (err == -EEXIST) { + return 0; } + /* Do not treat this as a fatal error. */ + dev_info(dev, "Failed to apply Tx scheduling configuration, err %pe\n", + ERR_PTR(err)); return 0; } /** + * ice_init_supported_rxdids - Initialize supported Rx descriptor IDs + * @hw: pointer to the hardware structure + * @pf: pointer to pf structure + * + * The pf->supported_rxdids bitmap is used to indicate to VFs which descriptor + * formats the PF hardware supports. The exact list of supported RXDIDs + * depends on the loaded DDP package. The IDs can be determined by reading the + * GLFLXP_RXDID_FLAGS register after the DDP package is loaded. + * + * Note that the legacy 32-byte RXDID 0 is always supported but is not listed + * in the DDP package. The 16-byte legacy descriptor is never supported by + * VFs. + */ +static void ice_init_supported_rxdids(struct ice_hw *hw, struct ice_pf *pf) +{ + pf->supported_rxdids = BIT(ICE_RXDID_LEGACY_1); + + for (int i = ICE_RXDID_FLEX_NIC; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) { + u32 regval; + + regval = rd32(hw, GLFLXP_RXDID_FLAGS(i, 0)); + if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) + & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) + pf->supported_rxdids |= BIT(i); + } +} + +/** * ice_init_ddp_config - DDP related configuration * @hw: pointer to the hardware structure * @pf: pointer to pf structure @@ -4596,6 +4621,9 @@ static int ice_init_ddp_config(struct ice_hw *hw, struct ice_pf *pf) ice_load_pkg(firmware, pf); release_firmware(firmware); + /* Initialize the supported Rx descriptor IDs after loading DDP */ + ice_init_supported_rxdids(hw, pf); + return 0; } @@ -4627,19 +4655,6 @@ static void ice_print_wake_reason(struct ice_pf *pf) } /** - * ice_pf_fwlog_update_module - update 1 module - * @pf: pointer to the PF struct - * @log_level: log_level to use for the @module - * @module: module to update - */ -void ice_pf_fwlog_update_module(struct ice_pf *pf, int log_level, int module) -{ - struct ice_hw *hw = &pf->hw; - - hw->fwlog_cfg.module_entries[module].log_level = log_level; -} - -/** * ice_register_netdev - register netdev * @vsi: pointer to the VSI struct */ @@ -4718,66 +4733,20 @@ static void ice_decfg_netdev(struct ice_vsi *vsi) vsi->netdev = NULL; } -/** - * ice_wait_for_fw - wait for full FW readiness - * @hw: pointer to the hardware structure - * @timeout: milliseconds that can elapse before timing out - */ -static int ice_wait_for_fw(struct ice_hw *hw, u32 timeout) -{ - int fw_loading; - u32 elapsed = 0; - - while (elapsed <= timeout) { - fw_loading = rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M; - - /* firmware was not yet loaded, we have to wait more */ - if (fw_loading) { - elapsed += 100; - msleep(100); - continue; - } - return 0; - } - - return -ETIMEDOUT; -} - -int ice_init_dev(struct ice_pf *pf) +void ice_init_dev_hw(struct ice_pf *pf) { - struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int err; - err = ice_init_hw(hw); - if (err) { - dev_err(dev, "ice_init_hw failed: %d\n", err); - return err; - } - - /* Some cards require longer initialization times - * due to necessity of loading FW from an external source. - * This can take even half a minute. - */ - if (ice_is_pf_c827(hw)) { - err = ice_wait_for_fw(hw, 30000); - if (err) { - dev_err(dev, "ice_wait_for_fw timed out"); - return err; - } - } - ice_init_feature_support(pf); err = ice_init_ddp_config(hw, pf); - if (err) - return err; /* if ice_init_ddp_config fails, ICE_FLAG_ADV_FEATURES bit won't be * set in pf->state, which will cause ice_is_safe_mode to return * true */ - if (ice_is_safe_mode(pf)) { + if (err || ice_is_safe_mode(pf)) { /* we already got function/device capabilities but these don't * reflect what the driver needs to do in safe mode. Instead of * adding conditional logic everywhere to ignore these @@ -4785,64 +4754,28 @@ int ice_init_dev(struct ice_pf *pf) */ ice_set_safe_mode_caps(hw); } +} - err = ice_init_pf(pf); - if (err) { - dev_err(dev, "ice_init_pf failed: %d\n", err); - goto err_init_pf; - } - - pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port; - pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port; - pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; - pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared; - if (pf->hw.tnl.valid_count[TNL_VXLAN]) { - pf->hw.udp_tunnel_nic.tables[0].n_entries = - pf->hw.tnl.valid_count[TNL_VXLAN]; - pf->hw.udp_tunnel_nic.tables[0].tunnel_types = - UDP_TUNNEL_TYPE_VXLAN; - } - if (pf->hw.tnl.valid_count[TNL_GENEVE]) { - pf->hw.udp_tunnel_nic.tables[1].n_entries = - pf->hw.tnl.valid_count[TNL_GENEVE]; - pf->hw.udp_tunnel_nic.tables[1].tunnel_types = - UDP_TUNNEL_TYPE_GENEVE; - } +int ice_init_dev(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + int err; + ice_set_pf_caps(pf); err = ice_init_interrupt_scheme(pf); if (err) { dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); - err = -EIO; - goto err_init_interrupt_scheme; + return -EIO; } - /* In case of MSIX we are going to setup the misc vector right here - * to handle admin queue events etc. In case of legacy and MSI - * the misc functionality and queue processing is combined in - * the same vector and that gets setup at open. - */ - err = ice_req_irq_msix_misc(pf); - if (err) { - dev_err(dev, "setup of misc vector failed: %d\n", err); - goto err_req_irq_msix_misc; - } + ice_start_service_task(pf); return 0; - -err_req_irq_msix_misc: - ice_clear_interrupt_scheme(pf); -err_init_interrupt_scheme: - ice_deinit_pf(pf); -err_init_pf: - ice_deinit_hw(hw); - return err; } void ice_deinit_dev(struct ice_pf *pf) { - ice_free_irq_msix_misc(pf); - ice_deinit_pf(pf); - ice_deinit_hw(&pf->hw); + ice_service_task_stop(pf); /* Service task is already stopped, so call reset directly. */ ice_reset(&pf->hw, ICE_RESET_PFR); @@ -5067,12 +5000,14 @@ static int ice_init_devlink(struct ice_pf *pf) ice_devlink_init_regions(pf); ice_devlink_register(pf); + ice_health_init(pf); return 0; } static void ice_deinit_devlink(struct ice_pf *pf) { + ice_health_deinit(pf); ice_devlink_unregister(pf); ice_devlink_destroy_regions(pf); ice_devlink_unregister_params(pf); @@ -5080,15 +5015,24 @@ static void ice_deinit_devlink(struct ice_pf *pf) static int ice_init(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); int err; - err = ice_init_dev(pf); - if (err) + err = ice_init_pf(pf); + if (err) { + dev_err(dev, "ice_init_pf failed: %d\n", err); return err; + } + + if (pf->hw.mac_type == ICE_MAC_E830) { + err = pci_enable_ptm(pf->pdev, NULL); + if (err) + dev_dbg(dev, "PCIe PTM not supported by PCIe bus/controller\n"); + } err = ice_alloc_vsis(pf); if (err) - goto err_alloc_vsis; + goto unroll_pf_init; err = ice_init_pf_sw(pf); if (err) @@ -5125,8 +5069,8 @@ err_init_link: ice_deinit_pf_sw(pf); err_init_pf_sw: ice_dealloc_vsis(pf); -err_alloc_vsis: - ice_deinit_dev(pf); +unroll_pf_init: + ice_deinit_pf(pf); return err; } @@ -5137,7 +5081,7 @@ static void ice_deinit(struct ice_pf *pf) ice_deinit_pf_sw(pf); ice_dealloc_vsis(pf); - ice_deinit_dev(pf); + ice_deinit_pf(pf); } /** @@ -5185,11 +5129,12 @@ int ice_load(struct ice_pf *pf) ice_napi_add(vsi); + ice_init_features(pf); + err = ice_init_rdma(pf); if (err) goto err_init_rdma; - ice_init_features(pf); ice_service_task_restart(pf); clear_bit(ICE_DOWN, pf->state); @@ -5197,6 +5142,7 @@ int ice_load(struct ice_pf *pf) return 0; err_init_rdma: + ice_deinit_features(pf); ice_tc_indir_block_unregister(vsi); err_tc_indir_block_register: ice_unregister_netdev(vsi); @@ -5220,14 +5166,44 @@ void ice_unload(struct ice_pf *pf) devl_assert_locked(priv_to_devlink(pf)); - ice_deinit_features(pf); ice_deinit_rdma(pf); + ice_deinit_features(pf); ice_tc_indir_block_unregister(vsi); ice_unregister_netdev(vsi); ice_devlink_destroy_pf_port(pf); ice_decfg_netdev(vsi); } +static int ice_probe_recovery_mode(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + int err; + + dev_err(dev, "Firmware recovery mode detected. Limiting functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode\n"); + + INIT_HLIST_HEAD(&pf->aq_wait_list); + spin_lock_init(&pf->aq_wait_lock); + init_waitqueue_head(&pf->aq_wait_queue); + + timer_setup(&pf->serv_tmr, ice_service_timer, 0); + pf->serv_tmr_period = HZ; + INIT_WORK(&pf->serv_task, ice_service_task_recovery_mode); + clear_bit(ICE_SERVICE_SCHED, pf->state); + err = ice_create_all_ctrlq(&pf->hw); + if (err) + return err; + + scoped_guard(devl, priv_to_devlink(pf)) { + err = ice_init_devlink(pf); + if (err) + return err; + } + + ice_service_task_restart(pf); + + return 0; +} + /** * ice_probe - Device initialization routine * @pdev: PCI device information struct @@ -5239,6 +5215,7 @@ static int ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) { struct device *dev = &pdev->dev; + bool need_dev_deinit = false; struct ice_adapter *adapter; struct ice_pf *pf; struct ice_hw *hw; @@ -5291,13 +5268,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) } pci_set_master(pdev); - - adapter = ice_adapter_get(pdev); - if (IS_ERR(adapter)) - return PTR_ERR(adapter); - pf->pdev = pdev; - pf->adapter = adapter; pci_set_drvdata(pdev, pf); set_bit(ICE_DOWN, pf->state); /* Disable service task until DOWN bit is cleared */ @@ -5325,30 +5296,55 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) hw->debug_mask = debug; #endif + if (ice_is_recovery_mode(hw)) + return ice_probe_recovery_mode(pf); + + err = ice_init_hw(hw); + if (err) { + dev_err(dev, "ice_init_hw failed: %d\n", err); + return err; + } + + adapter = ice_adapter_get(pdev); + if (IS_ERR(adapter)) { + err = PTR_ERR(adapter); + goto unroll_hw_init; + } + pf->adapter = adapter; + + err = ice_init_dev(pf); + if (err) + goto unroll_adapter; + err = ice_init(pf); if (err) - goto err_init; + goto unroll_dev_init; devl_lock(priv_to_devlink(pf)); err = ice_load(pf); if (err) - goto err_load; + goto unroll_init; err = ice_init_devlink(pf); if (err) - goto err_init_devlink; + goto unroll_load; devl_unlock(priv_to_devlink(pf)); return 0; -err_init_devlink: +unroll_load: ice_unload(pf); -err_load: +unroll_init: devl_unlock(priv_to_devlink(pf)); ice_deinit(pf); -err_init: +unroll_dev_init: + need_dev_deinit = true; +unroll_adapter: ice_adapter_put(pdev); - pci_disable_device(pdev); +unroll_hw_init: + ice_deinit_hw(hw); + if (need_dev_deinit) + ice_deinit_dev(pf); return err; } @@ -5410,7 +5406,7 @@ static void ice_setup_mc_magic_wake(struct ice_pf *pf) status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL); if (status) dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %d aq_err %s\n", - status, ice_aq_str(hw->adminq.sq_last_status)); + status, libie_aq_str(hw->adminq.sq_last_status)); } /** @@ -5428,6 +5424,14 @@ static void ice_remove(struct pci_dev *pdev) msleep(100); } + if (ice_is_recovery_mode(&pf->hw)) { + ice_service_task_stop(pf); + scoped_guard(devl, priv_to_devlink(pf)) { + ice_deinit_devlink(pf); + } + return; + } + if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) { set_bit(ICE_VF_RESETS_DISABLED, pf->state); ice_free_vfs(pf); @@ -5435,14 +5439,11 @@ static void ice_remove(struct pci_dev *pdev) ice_hwmon_exit(pf); - ice_service_task_stop(pf); - ice_aq_cancel_waiting_tasks(pf); - set_bit(ICE_DOWN, pf->state); - if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); devl_lock(priv_to_devlink(pf)); + ice_dealloc_all_dynamic_ports(pf); ice_deinit_devlink(pf); ice_unload(pf); @@ -5455,7 +5456,11 @@ static void ice_remove(struct pci_dev *pdev) ice_set_wake(pf); ice_adapter_put(pdev); - pci_disable_device(pdev); + ice_deinit_hw(&pf->hw); + + ice_deinit_dev(pf); + ice_aq_cancel_waiting_tasks(pf); + set_bit(ICE_DOWN, pf->state); } /** @@ -5535,7 +5540,9 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) if (ret) goto err_reinit; ice_vsi_map_rings_to_vectors(pf->vsi[v]); + rtnl_lock(); ice_vsi_set_napi_queues(pf->vsi[v]); + rtnl_unlock(); } ret = ice_req_irq_msix_misc(pf); @@ -5549,8 +5556,12 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) err_reinit: while (v--) - if (pf->vsi[v]) + if (pf->vsi[v]) { + rtnl_lock(); + ice_vsi_clear_napi_queues(pf->vsi[v]); + rtnl_unlock(); ice_vsi_free_q_vectors(pf->vsi[v]); + } return ret; } @@ -5615,6 +5626,9 @@ static int ice_suspend(struct device *dev) ice_for_each_vsi(pf, v) { if (!pf->vsi[v]) continue; + rtnl_lock(); + ice_vsi_clear_napi_queues(pf->vsi[v]); + rtnl_unlock(); ice_vsi_free_q_vectors(pf->vsi[v]); } ice_clear_interrupt_scheme(pf); @@ -5639,7 +5653,6 @@ static int ice_resume(struct device *dev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - pci_save_state(pdev); if (!pci_device_is_present(pdev)) return -ENODEV; @@ -5739,7 +5752,6 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev) } else { pci_set_master(pdev); pci_restore_state(pdev); - pci_save_state(pdev); pci_wake_from_d3(pdev, false); /* Check for life */ @@ -5859,6 +5871,15 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_QSFP), }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_SFP), }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_SFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835CC_BACKPLANE), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835CC_QSFP56), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835CC_SFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835C_BACKPLANE), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835C_QSFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835C_SFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835_L_BACKPLANE), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835_L_QSFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835_L_SFP), }, /* required last entry */ {} }; @@ -5902,7 +5923,7 @@ static int __init ice_module_init(void) ice_adv_lnk_speed_maps_init(); - ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME); + ice_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, KBUILD_MODNAME); if (!ice_wq) { pr_err("Failed to create workqueue\n"); return status; @@ -5922,8 +5943,16 @@ static int __init ice_module_init(void) goto err_dest_lag_wq; } + status = ice_sf_driver_register(); + if (status) { + pr_err("Failed to register SF driver, err %d\n", status); + goto err_sf_driver; + } + return 0; +err_sf_driver: + pci_unregister_driver(&ice_driver); err_dest_lag_wq: destroy_workqueue(ice_lag_wq); ice_debugfs_exit(); @@ -5941,6 +5970,7 @@ module_init(ice_module_init); */ static void __exit ice_module_exit(void) { + ice_sf_driver_unregister(); pci_unregister_driver(&ice_driver); ice_debugfs_exit(); destroy_workqueue(ice_wq); @@ -6118,12 +6148,14 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) * @addr: the MAC address entry being added * @vid: VLAN ID * @flags: instructions from stack about fdb operation + * @notified: whether notification was emitted * @extack: netlink extended ack */ static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags, struct netlink_ext_ack __always_unused *extack) + u16 flags, bool *notified, + struct netlink_ext_ack __always_unused *extack) { int err; @@ -6157,12 +6189,14 @@ ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], * @dev: the net device pointer * @addr: the MAC address entry being added * @vid: VLAN ID + * @notified: whether notification was emitted * @extack: netlink extended ack */ static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - __always_unused u16 vid, struct netlink_ext_ack *extack) + __always_unused u16 vid, bool *notified, + struct netlink_ext_ack *extack) { int err; @@ -6366,10 +6400,12 @@ ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features) int err = 0; /* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking - * if either bit is set + * if either bit is set. In switchdev mode Rx filtering should never be + * enabled. */ - if (features & - (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)) + if ((features & + (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)) && + !ice_is_eswitch_mode_switchdev(vsi->back)) err = vlan_ops->ena_rx_filtering(vsi); else err = vlan_ops->dis_rx_filtering(vsi); @@ -6517,13 +6553,24 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) if (changed & NETIF_F_HW_TC) { bool ena = !!(features & NETIF_F_HW_TC); - ena ? set_bit(ICE_FLAG_CLS_FLOWER, pf->flags) : - clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags); + assign_bit(ICE_FLAG_CLS_FLOWER, pf->flags, ena); } if (changed & NETIF_F_LOOPBACK) ret = ice_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK)); + /* Due to E830 hardware limitations, TSO (NETIF_F_ALL_TSO) with GCS + * (NETIF_F_HW_CSUM) is not supported. + */ + if (ice_is_feature_supported(pf, ICE_F_GCS) && + ((features & NETIF_F_HW_CSUM) && (features & NETIF_F_ALL_TSO))) { + if (netdev->features & NETIF_F_HW_CSUM) + dev_err(ice_pf_to_dev(pf), "To enable TSO, you must first disable HW checksum.\n"); + else + dev_err(ice_pf_to_dev(pf), "To enable HW checksum, you must first disable TSO.\n"); + return -EIO; + } + return ret; } @@ -6742,11 +6789,12 @@ static int ice_up_complete(struct ice_vsi *vsi) if (vsi->port_info && (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) && - vsi->netdev && vsi->type == ICE_VSI_PF) { + ((vsi->netdev && (vsi->type == ICE_VSI_PF || + vsi->type == ICE_VSI_SF)))) { ice_print_link_msg(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - ice_ptp_link_change(pf, pf->hw.pf_id, true); + ice_ptp_link_change(pf, true); } /* Perform an initial read of the statistics registers now to @@ -7078,6 +7126,9 @@ void ice_update_pf_stats(struct ice_pf *pf) &prev_ps->mac_remote_faults, &cur_ps->mac_remote_faults); + ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded, + &prev_ps->rx_len_errors, &cur_ps->rx_len_errors); + ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded, &prev_ps->rx_undersize, &cur_ps->rx_undersize); @@ -7100,7 +7151,6 @@ void ice_update_pf_stats(struct ice_pf *pf) * @netdev: network interface device structure * @stats: main device statistics structure */ -static void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { struct ice_netdev_priv *np = netdev_priv(netdev); @@ -7217,7 +7267,7 @@ int ice_down(struct ice_vsi *vsi) if (vsi->netdev) { vlan_err = ice_vsi_del_vlan_zero(vsi); - ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); + ice_ptp_link_change(vsi->back, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } @@ -7228,7 +7278,7 @@ int ice_down(struct ice_vsi *vsi) if (tx_err) netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n", vsi->vsi_num, tx_err); - if (!tx_err && ice_is_xdp_ena_vsi(vsi)) { + if (!tx_err && vsi->xdp_rings) { tx_err = ice_vsi_stop_xdp_tx_rings(vsi); if (tx_err) netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n", @@ -7245,7 +7295,7 @@ int ice_down(struct ice_vsi *vsi) ice_for_each_txq(vsi, i) ice_clean_tx_ring(vsi->tx_rings[i]); - if (ice_is_xdp_ena_vsi(vsi)) + if (vsi->xdp_rings) ice_for_each_xdp_txq(vsi, i) ice_clean_tx_ring(vsi->xdp_rings[i]); @@ -7439,9 +7489,10 @@ int ice_vsi_open(struct ice_vsi *vsi) if (err) goto err_setup_rx; - ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc); + if (bitmap_empty(pf->txtime_txqs, pf->max_pf_txqs)) + ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc); - if (vsi->type == ICE_VSI_PF) { + if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_SF) { /* Notify the stack of the actual queue counts. */ err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq); if (err) @@ -7450,6 +7501,8 @@ int ice_vsi_open(struct ice_vsi *vsi) err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq); if (err) goto err_set_qs; + + ice_vsi_set_napi_queues(vsi); } err = ice_up_complete(vsi); @@ -7587,6 +7640,7 @@ static void ice_update_pf_netdev_link(struct ice_pf *pf) */ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) { + struct ice_vsi *vsi = ice_get_main_vsi(pf); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; bool dvm; @@ -7729,6 +7783,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_rebuild_arfs(pf); } + if (vsi && vsi->netdev) + netif_device_attach(vsi->netdev); + ice_update_pf_netdev_link(pf); /* tell the firmware we are up */ @@ -7744,6 +7801,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) /* if we get here, reset flow is successful */ clear_bit(ICE_RESET_FAILED, pf->state); + ice_health_clear(pf); + ice_plug_aux_dev(pf); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) ice_lag_rebuild(pf); @@ -7771,7 +7830,7 @@ clear_recovery: * * Returns 0 on success, negative on failure */ -static int ice_change_mtu(struct net_device *netdev, int new_mtu) +int ice_change_mtu(struct net_device *netdev, int new_mtu) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; @@ -7794,12 +7853,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) frame_size - ICE_ETH_PKT_HDR_PAD); return -EINVAL; } - } else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) { - if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) { - netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n", - ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD); - return -EINVAL; - } } /* if a reset is in progress, wait for some time for it to complete */ @@ -7830,69 +7883,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } /** - * ice_eth_ioctl - Access the hwtstamp interface - * @netdev: network interface device structure - * @ifr: interface request data - * @cmd: ioctl command - */ -static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; - - switch (cmd) { - case SIOCGHWTSTAMP: - return ice_ptp_get_ts_config(pf, ifr); - case SIOCSHWTSTAMP: - return ice_ptp_set_ts_config(pf, ifr); - default: - return -EOPNOTSUPP; - } -} - -/** - * ice_aq_str - convert AQ err code to a string - * @aq_err: the AQ error code to convert - */ -const char *ice_aq_str(enum ice_aq_err aq_err) -{ - switch (aq_err) { - case ICE_AQ_RC_OK: - return "OK"; - case ICE_AQ_RC_EPERM: - return "ICE_AQ_RC_EPERM"; - case ICE_AQ_RC_ENOENT: - return "ICE_AQ_RC_ENOENT"; - case ICE_AQ_RC_ENOMEM: - return "ICE_AQ_RC_ENOMEM"; - case ICE_AQ_RC_EBUSY: - return "ICE_AQ_RC_EBUSY"; - case ICE_AQ_RC_EEXIST: - return "ICE_AQ_RC_EEXIST"; - case ICE_AQ_RC_EINVAL: - return "ICE_AQ_RC_EINVAL"; - case ICE_AQ_RC_ENOSPC: - return "ICE_AQ_RC_ENOSPC"; - case ICE_AQ_RC_ENOSYS: - return "ICE_AQ_RC_ENOSYS"; - case ICE_AQ_RC_EMODE: - return "ICE_AQ_RC_EMODE"; - case ICE_AQ_RC_ENOSEC: - return "ICE_AQ_RC_ENOSEC"; - case ICE_AQ_RC_EBADSIG: - return "ICE_AQ_RC_EBADSIG"; - case ICE_AQ_RC_ESVN: - return "ICE_AQ_RC_ESVN"; - case ICE_AQ_RC_EBADMAN: - return "ICE_AQ_RC_EBADMAN"; - case ICE_AQ_RC_EBADBUF: - return "ICE_AQ_RC_EBADBUF"; - } - - return "ICE_AQ_RC_UNKNOWN"; -} - -/** * ice_set_rss_lut - Set RSS LUT * @vsi: Pointer to VSI structure * @lut: Lookup table @@ -7917,7 +7907,7 @@ int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) status = ice_aq_set_rss_lut(hw, ¶ms); if (status) dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %d aq_err %s\n", - status, ice_aq_str(hw->adminq.sq_last_status)); + status, libie_aq_str(hw->adminq.sq_last_status)); return status; } @@ -7940,7 +7930,7 @@ int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed) status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); if (status) dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %d aq_err %s\n", - status, ice_aq_str(hw->adminq.sq_last_status)); + status, libie_aq_str(hw->adminq.sq_last_status)); return status; } @@ -7970,7 +7960,7 @@ int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) status = ice_aq_get_rss_lut(hw, ¶ms); if (status) dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %d aq_err %s\n", - status, ice_aq_str(hw->adminq.sq_last_status)); + status, libie_aq_str(hw->adminq.sq_last_status)); return status; } @@ -7993,7 +7983,7 @@ int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed) status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); if (status) dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %d aq_err %s\n", - status, ice_aq_str(hw->adminq.sq_last_status)); + status, libie_aq_str(hw->adminq.sq_last_status)); return status; } @@ -8066,9 +8056,7 @@ static int ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags) { - struct ice_netdev_priv *np = netdev_priv(dev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(dev); u16 bmode; bmode = pf->first_sw->bridge_mode; @@ -8110,7 +8098,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (ret) { dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %s\n", - bmode, ret, ice_aq_str(hw->adminq.sq_last_status)); + bmode, ret, libie_aq_str(hw->adminq.sq_last_status)); goto out; } /* Update sw flags for book keeping */ @@ -8138,8 +8126,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 __always_unused flags, struct netlink_ext_ack __always_unused *extack) { - struct ice_netdev_priv *np = netdev_priv(dev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(dev); struct nlattr *attr, *br_spec; struct ice_hw *hw = &pf->hw; struct ice_sw *pf_sw; @@ -8178,7 +8165,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (err) { netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %s\n", mode, err, - ice_aq_str(hw->adminq.sq_last_status)); + libie_aq_str(hw->adminq.sq_last_status)); /* revert hw->evb_veb */ hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB); return err; @@ -8195,7 +8182,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, * @netdev: network interface device structure * @txqueue: Tx queue */ -static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) +void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_tx_ring *tx_ring = NULL; @@ -8234,16 +8221,18 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) if (tx_ring) { struct ice_hw *hw = &pf->hw; - u32 head, val = 0; + u32 head, intr = 0; head = FIELD_GET(QTX_COMM_HEAD_HEAD_M, rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue]))); /* Read interrupt register */ - val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); + intr = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n", vsi->vsi_num, txqueue, tx_ring->next_to_clean, - head, tx_ring->next_to_use, val); + head, tx_ring->next_to_use, intr); + + ice_prep_tx_hang_report(pf, tx_ring, vsi->vsi_num, head, intr); } pf->tx_timeout_last_recovery = jiffies; @@ -8277,11 +8266,16 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) * @np: net device to configure * @filter_dev: device on which filter is added * @cls_flower: offload data + * @ingress: if the rule is added to an ingress block + * + * Return: 0 if the flower was successfully added or deleted, + * negative error code otherwise. */ static int ice_setup_tc_cls_flower(struct ice_netdev_priv *np, struct net_device *filter_dev, - struct flow_cls_offload *cls_flower) + struct flow_cls_offload *cls_flower, + bool ingress) { struct ice_vsi *vsi = np->vsi; @@ -8290,7 +8284,7 @@ ice_setup_tc_cls_flower(struct ice_netdev_priv *np, switch (cls_flower->command) { case FLOW_CLS_REPLACE: - return ice_add_cls_flower(filter_dev, vsi, cls_flower); + return ice_add_cls_flower(filter_dev, vsi, cls_flower, ingress); case FLOW_CLS_DESTROY: return ice_del_cls_flower(vsi, cls_flower); default: @@ -8299,20 +8293,46 @@ ice_setup_tc_cls_flower(struct ice_netdev_priv *np, } /** - * ice_setup_tc_block_cb - callback handler registered for TC block + * ice_setup_tc_block_cb_ingress - callback handler for ingress TC block * @type: TC SETUP type * @type_data: TC flower offload data that contains user input * @cb_priv: netdev private data + * + * Return: 0 if the setup was successful, negative error code otherwise. */ static int -ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +ice_setup_tc_block_cb_ingress(enum tc_setup_type type, void *type_data, + void *cb_priv) { struct ice_netdev_priv *np = cb_priv; switch (type) { case TC_SETUP_CLSFLOWER: return ice_setup_tc_cls_flower(np, np->vsi->netdev, - type_data); + type_data, true); + default: + return -EOPNOTSUPP; + } +} + +/** + * ice_setup_tc_block_cb_egress - callback handler for egress TC block + * @type: TC SETUP type + * @type_data: TC flower offload data that contains user input + * @cb_priv: netdev private data + * + * Return: 0 if the setup was successful, negative error code otherwise. + */ +static int +ice_setup_tc_block_cb_egress(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct ice_netdev_priv *np = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return ice_setup_tc_cls_flower(np, np->vsi->netdev, + type_data, false); default: return -EOPNOTSUPP; } @@ -9065,7 +9085,7 @@ static int ice_create_q_channels(struct ice_vsi *vsi) list_add_tail(&ch->list, &vsi->ch_list); vsi->tc_map_vsi[i] = ch->ch_vsi; dev_dbg(ice_pf_to_dev(pf), - "successfully created channel: VSI %pK\n", ch->ch_vsi); + "successfully created channel: VSI %p\n", ch->ch_vsi); } return 0; @@ -9250,6 +9270,96 @@ exit: return ret; } +/** + * ice_cfg_txtime - configure Tx Time for the Tx ring + * @tx_ring: pointer to the Tx ring structure + * + * Return: 0 on success, negative value on failure. + */ +static int ice_cfg_txtime(struct ice_tx_ring *tx_ring) +{ + int err, timeout = 50; + struct ice_vsi *vsi; + struct device *dev; + struct ice_pf *pf; + u32 queue; + + if (!tx_ring) + return -EINVAL; + + vsi = tx_ring->vsi; + pf = vsi->back; + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + queue = tx_ring->q_index; + dev = ice_pf_to_dev(pf); + + /* Ignore return value, and always attempt to enable queue. */ + ice_qp_dis(vsi, queue); + + err = ice_qp_ena(vsi, queue); + if (err) + dev_err(dev, "Failed to enable Tx queue %d for TxTime configuration\n", + queue); + + clear_bit(ICE_CFG_BUSY, pf->state); + return err; +} + +/** + * ice_offload_txtime - set earliest TxTime first + * @netdev: network interface device structure + * @qopt_off: etf queue option offload from the skb to set + * + * Return: 0 on success, negative value on failure. + */ +static int ice_offload_txtime(struct net_device *netdev, + void *qopt_off) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + struct tc_etf_qopt_offload *qopt; + struct ice_vsi *vsi = np->vsi; + struct ice_tx_ring *tx_ring; + int ret = 0; + + if (!ice_is_feature_supported(pf, ICE_F_TXTIME)) + return -EOPNOTSUPP; + + qopt = qopt_off; + if (!qopt_off || qopt->queue < 0 || qopt->queue >= vsi->num_txq) + return -EINVAL; + + if (qopt->enable) + set_bit(qopt->queue, pf->txtime_txqs); + else + clear_bit(qopt->queue, pf->txtime_txqs); + + if (netif_running(vsi->netdev)) { + tx_ring = vsi->tx_rings[qopt->queue]; + ret = ice_cfg_txtime(tx_ring); + if (ret) + goto err; + } + + netdev_info(netdev, "%s TxTime on queue: %i\n", + str_enable_disable(qopt->enable), qopt->queue); + return 0; + +err: + netdev_err(netdev, "Failed to %s TxTime on queue: %i\n", + str_enable_disable(qopt->enable), qopt->queue); + + if (qopt->enable) + clear_bit(qopt->queue, pf->txtime_txqs); + return ret; +} + static LIST_HEAD(ice_block_cb_list); static int @@ -9257,27 +9367,45 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { struct ice_netdev_priv *np = netdev_priv(netdev); + enum flow_block_binder_type binder_type; + struct iidc_rdma_core_dev_info *cdev; struct ice_pf *pf = np->vsi->back; + flow_setup_cb_t *flower_handler; bool locked = false; int err; switch (type) { case TC_SETUP_BLOCK: + binder_type = + ((struct flow_block_offload *)type_data)->binder_type; + + switch (binder_type) { + case FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS: + flower_handler = ice_setup_tc_block_cb_ingress; + break; + case FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS: + flower_handler = ice_setup_tc_block_cb_egress; + break; + default: + return -EOPNOTSUPP; + } + return flow_block_cb_setup_simple(type_data, &ice_block_cb_list, - ice_setup_tc_block_cb, - np, np, true); + flower_handler, + np, np, false); case TC_SETUP_QDISC_MQPRIO: if (ice_is_eswitch_mode_switchdev(pf)) { netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n"); return -EOPNOTSUPP; } - if (pf->adev) { + cdev = pf->cdev_info; + if (cdev && cdev->adev) { mutex_lock(&pf->adev_mutex); - device_lock(&pf->adev->dev); + device_lock(&cdev->adev->dev); locked = true; - if (pf->adev->dev.driver) { + if (cdev->adev->dev.driver) { netdev_err(netdev, "Cannot change qdisc when RDMA is active\n"); err = -EBUSY; goto adev_unlock; @@ -9291,10 +9419,12 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, adev_unlock: if (locked) { - device_unlock(&pf->adev->dev); + device_unlock(&cdev->adev->dev); mutex_unlock(&pf->adev_mutex); } return err; + case TC_SETUP_QDISC_ETF: + return ice_offload_txtime(netdev, type_data); default: return -EOPNOTSUPP; } @@ -9327,7 +9457,7 @@ ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data, case TC_SETUP_CLSFLOWER: return ice_setup_tc_cls_flower(np, priv->netdev, (struct flow_cls_offload *) - type_data); + type_data, false); default: return -EOPNOTSUPP; } @@ -9430,8 +9560,7 @@ ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, */ int ice_open(struct net_device *netdev) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); if (ice_is_reset_in_progress(pf->state)) { netdev_err(netdev, "can't open net device while reset is in progress"); @@ -9634,7 +9763,6 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, .ndo_set_tx_maxrate = ice_set_tx_maxrate, - .ndo_eth_ioctl = ice_eth_ioctl, .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, .ndo_set_vf_mac = ice_set_vf_mac, .ndo_get_vf_config = ice_get_vf_cfg, @@ -9658,4 +9786,6 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_bpf = ice_xdp, .ndo_xdp_xmit = ice_xdp_xmit, .ndo_xsk_wakeup = ice_xsk_wakeup, + .ndo_hwtstamp_get = ice_ptp_hwtstamp_get, + .ndo_hwtstamp_set = ice_ptp_hwtstamp_set, }; |
