diff options
Diffstat (limited to 'drivers/infiniband/hw/bnxt_re/main.c')
-rw-r--r-- | drivers/infiniband/hw/bnxt_re/main.c | 1074 |
1 files changed, 776 insertions, 298 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 54b4d2f3a5d8..4659a2f73364 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -67,6 +67,7 @@ #include <rdma/bnxt_re-abi.h> #include "bnxt.h" #include "hw_counters.h" +#include "debugfs.h" static char version[] = BNXT_RE_DESC "\n"; @@ -78,16 +79,12 @@ MODULE_LICENSE("Dual BSD/GPL"); /* globals */ static DEFINE_MUTEX(bnxt_re_mutex); -static void bnxt_re_stop_irq(void *handle); -static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev); -static int bnxt_re_netdev_event(struct notifier_block *notifier, - unsigned long event, void *ptr); -static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev); -static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev); static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev); static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, u32 *offset); +static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp, + u8 port_num, enum ib_event_type event); static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *cctx; @@ -129,18 +126,20 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) } } -static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) +static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *cctx; cctx = rdev->chip_ctx; - cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? - mode : BNXT_QPLIB_WQE_MODE_STATIC; + cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? + BNXT_QPLIB_WQE_MODE_VARIABLE : BNXT_QPLIB_WQE_MODE_STATIC; if (bnxt_re_hwrm_qcaps(rdev)) dev_err(rdev_to_dev(rdev), "Failed to query hwrm qcaps\n"); - if (bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx)) + if (bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx)) { cctx->modes.toggle_bits |= BNXT_QPLIB_CQ_TOGGLE_BIT; + cctx->modes.toggle_bits |= BNXT_QPLIB_SRQ_TOGGLE_BIT; + } } static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) @@ -149,6 +148,10 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) if (!rdev->chip_ctx) return; + + kfree(rdev->dev_attr); + rdev->dev_attr = NULL; + chip_ctx = rdev->chip_ctx; rdev->chip_ctx = NULL; rdev->rcfw.res = NULL; @@ -158,14 +161,15 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) kfree(chip_ctx); } -static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) +static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; - int rc; + int rc = -ENOMEM; en_dev = rdev->en_dev; + rdev->qplib_res.pdev = en_dev->pdev; chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL); if (!chip_ctx) return -ENOMEM; @@ -177,20 +181,31 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) rdev->qplib_res.cctx = rdev->chip_ctx; rdev->rcfw.res = &rdev->qplib_res; - rdev->qplib_res.dattr = &rdev->dev_attr; + rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL); + if (!rdev->dev_attr) + goto free_chip_ctx; + rdev->qplib_res.dattr = rdev->dev_attr; rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev); + rdev->qplib_res.en_dev = en_dev; - bnxt_re_set_drv_mode(rdev, wqe_mode); + bnxt_re_set_drv_mode(rdev); bnxt_re_set_db_offset(rdev); rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); if (rc) - return rc; + goto free_dev_attr; if (bnxt_qplib_determine_atomics(en_dev->pdev)) ibdev_info(&rdev->ibdev, "platform doesn't support global atomics."); return 0; +free_dev_attr: + kfree(rdev->dev_attr); + rdev->dev_attr = NULL; +free_chip_ctx: + kfree(rdev->chip_ctx); + rdev->chip_ctx = NULL; + return rc; } /* SR-IOV helper functions */ @@ -212,7 +227,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) struct bnxt_qplib_ctx *ctx; int i; - attr = &rdev->dev_attr; + attr = rdev->dev_attr; ctx = &rdev->qplib_ctx; ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, @@ -226,7 +241,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) rdev->qplib_ctx.tqm_ctx.qcount[i] = - rdev->dev_attr.tqm_alloc_reqs[i]; + rdev->dev_attr->tqm_alloc_reqs[i]; } static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) @@ -280,33 +295,163 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) { + /* + * Use the total VF count since the actual VF count may not be + * available at this point. + */ rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev); - if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { - bnxt_re_set_resource_limits(rdev); - bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, - &rdev->qplib_ctx); + if (!rdev->num_vfs) + return; + + bnxt_re_set_resource_limits(rdev); + bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, + &rdev->qplib_ctx); +} + +struct bnxt_re_dcb_work { + struct work_struct work; + struct bnxt_re_dev *rdev; + struct hwrm_async_event_cmpl cmpl; +}; + +static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp) +{ + return qp->ib_qp.qp_type == IB_QPT_GSI; +} + +static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev) +{ + struct bnxt_re_qp *qp; + + mutex_lock(&rdev->qp_lock); + list_for_each_entry(qp, &rdev->qp_list, list) { + if (bnxt_re_is_qp1_qp(qp)) { + mutex_unlock(&rdev->qp_lock); + return qp; + } } + mutex_unlock(&rdev->qp_lock); + return NULL; } -static void bnxt_re_shutdown(struct auxiliary_device *adev) +static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev) +{ + struct bnxt_re_qp *qp; + + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return 0; + + qp = bnxt_re_get_qp1_qp(rdev); + if (!qp) + return 0; + + qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP; + qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp; + + return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp); +} + +static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev) +{ + rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq"); +} + +static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev) +{ + if (!rdev->dcb_wq) + return; + destroy_workqueue(rdev->dcb_wq); +} + +static void bnxt_re_dcb_wq_task(struct work_struct *work) +{ + struct bnxt_re_dcb_work *dcb_work = + container_of(work, struct bnxt_re_dcb_work, work); + struct bnxt_re_dev *rdev = dcb_work->rdev; + struct bnxt_qplib_cc_param *cc_param; + int rc; + + if (!rdev) + goto free_dcb; + + cc_param = &rdev->cc_param; + rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param); + if (rc) { + ibdev_dbg(&rdev->ibdev, "Failed to query ccparam rc:%d", rc); + goto free_dcb; + } + if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) { + cc_param->qp1_tos_dscp = cc_param->tos_dscp; + rc = bnxt_re_update_qp1_tos_dscp(rdev); + if (rc) { + ibdev_dbg(&rdev->ibdev, "%s: Failed to modify QP1 rc:%d", + __func__, rc); + goto free_dcb; + } + } + +free_dcb: + kfree(dcb_work); +} + +static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl) { - struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev); + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); + struct bnxt_re_dcb_work *dcb_work; + struct bnxt_re_dev *rdev; + u32 data1, data2; + u16 event_id; + rdev = en_info->rdev; if (!rdev) return; - ib_unregister_device(&rdev->ibdev); - bnxt_re_dev_uninit(rdev); + + event_id = le16_to_cpu(cmpl->event_id); + data1 = le32_to_cpu(cmpl->event_data1); + data2 = le32_to_cpu(cmpl->event_data2); + + ibdev_dbg(&rdev->ibdev, "Async event_id = %d data1 = %d data2 = %d", + event_id, data1, data2); + + switch (event_id) { + case ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE: + dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC); + if (!dcb_work) + break; + + dcb_work->rdev = rdev; + memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl)); + INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task); + queue_work(rdev->dcb_wq, &dcb_work->work); + break; + default: + break; + } } -static void bnxt_re_stop_irq(void *handle) +static void bnxt_re_stop_irq(void *handle, bool reset) { - struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle; - struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw; + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); + struct bnxt_qplib_rcfw *rcfw; + struct bnxt_re_dev *rdev; struct bnxt_qplib_nq *nq; int indx; - for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) { - nq = &rdev->nq[indx - 1]; + rdev = en_info->rdev; + if (!rdev) + return; + rcfw = &rdev->rcfw; + + if (reset) { + set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); + set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); + wake_up_all(&rdev->rcfw.cmdq.waitq); + bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, + IB_EVENT_DEVICE_FATAL); + } + + for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) { + nq = &rdev->nqr->nq[indx - 1]; bnxt_qplib_nq_stop_irq(nq, false); } @@ -315,12 +460,18 @@ static void bnxt_re_stop_irq(void *handle) static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) { - struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle; - struct bnxt_msix_entry *msix_ent = rdev->en_dev->msix_entries; - struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw; + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); + struct bnxt_msix_entry *msix_ent; + struct bnxt_qplib_rcfw *rcfw; + struct bnxt_re_dev *rdev; struct bnxt_qplib_nq *nq; int indx, rc; + rdev = en_info->rdev; + if (!rdev) + return; + msix_ent = rdev->nqr->msix_entries; + rcfw = &rdev->rcfw; if (!ent) { /* Not setting the f/w timeout bit in rcfw. * During the driver unload the first command @@ -334,8 +485,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) /* Vectors may change after restart, so update with new vectors * in device sctructure. */ - for (indx = 0; indx < rdev->num_msix; indx++) - rdev->en_dev->msix_entries[indx].vector = ent[indx].vector; + for (indx = 0; indx < rdev->nqr->num_msix; indx++) + rdev->nqr->msix_entries[indx].vector = ent[indx].vector; rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, false); @@ -343,8 +494,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n"); return; } - for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) { - nq = &rdev->nq[indx - 1]; + for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) { + nq = &rdev->nqr->nq[indx - 1]; rc = bnxt_qplib_nq_start_irq(nq, indx - 1, msix_ent[indx].vector, false); if (rc) { @@ -356,6 +507,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) } static struct bnxt_ulp_ops bnxt_re_ulp_ops = { + .ulp_async_notifier = bnxt_re_async_notifier, .ulp_irq_stop = bnxt_re_stop_irq, .ulp_irq_restart = bnxt_re_start_irq }; @@ -365,14 +517,9 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = { static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev; - int rc; en_dev = rdev->en_dev; - - rc = bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev); - if (!rc) - rdev->qplib_res.pdev = rdev->en_dev->pdev; - return rc; + return bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev->adev); } static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd) @@ -423,6 +570,7 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) struct hwrm_func_qcaps_input req = {}; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_fw_msg fw_msg = {}; + u32 flags_ext2; int rc; cctx = rdev->chip_ctx; @@ -436,14 +584,15 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) return rc; cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE; - cctx->modes.dbr_pacing = - le32_to_cpu(resp.flags_ext2) & - FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED; + flags_ext2 = le32_to_cpu(resp.flags_ext2); + cctx->modes.dbr_pacing = flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED || + flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED; return 0; } static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev) { + struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; struct hwrm_func_dbr_pacing_qcfg_output resp = {}; struct hwrm_func_dbr_pacing_qcfg_input req = {}; struct bnxt_en_dev *en_dev = rdev->en_dev; @@ -465,6 +614,13 @@ static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev) cctx->dbr_stat_db_fifo = le32_to_cpu(resp.dbr_stat_db_fifo_reg) & ~FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK; + + pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth); + if (!pacing_data->fifo_max_depth) + pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx); + pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask); + pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift; + return 0; } @@ -479,24 +635,55 @@ static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev) pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE; } -static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) +static u32 __get_fifo_occupancy(struct bnxt_re_dev *rdev) { + struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; u32 read_val, fifo_occup; + read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); + fifo_occup = pacing_data->fifo_max_depth - + ((read_val & pacing_data->fifo_room_mask) >> + pacing_data->fifo_room_shift); + return fifo_occup; +} + +static bool is_dbr_fifo_full(struct bnxt_re_dev *rdev) +{ + u32 max_occup, fifo_occup; + + fifo_occup = __get_fifo_occupancy(rdev); + max_occup = BNXT_RE_MAX_FIFO_DEPTH(rdev->chip_ctx) - 1; + if (fifo_occup == max_occup) + return true; + + return false; +} + +static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; + u32 retry_fifo_check = 1000; + u32 fifo_occup; + /* loop shouldn't run infintely as the occupancy usually goes * below pacing algo threshold as soon as pacing kicks in. */ while (1) { - read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); - fifo_occup = BNXT_RE_MAX_FIFO_DEPTH - - ((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >> - BNXT_RE_DB_FIFO_ROOM_SHIFT); + fifo_occup = __get_fifo_occupancy(rdev); /* Fifo occupancy cannot be greater the MAX FIFO depth */ - if (fifo_occup > BNXT_RE_MAX_FIFO_DEPTH) + if (fifo_occup > pacing_data->fifo_max_depth) break; - if (fifo_occup < rdev->qplib_res.pacing_data->pacing_th) + if (fifo_occup < pacing_data->pacing_th) + break; + if (!retry_fifo_check--) { + dev_info_once(rdev_to_dev(rdev), + "%s: fifo_occup = 0x%xfifo_max_depth = 0x%x pacing_th = 0x%x\n", + __func__, fifo_occup, pacing_data->fifo_max_depth, + pacing_data->pacing_th); break; + } + } } @@ -546,16 +733,13 @@ static void bnxt_re_pacing_timer_exp(struct work_struct *work) struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, dbq_pacing_work.work); struct bnxt_qplib_db_pacing_data *pacing_data; - u32 read_val, fifo_occup; + u32 fifo_occup; if (!mutex_trylock(&rdev->pacing.dbq_lock)) return; pacing_data = rdev->qplib_res.pacing_data; - read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); - fifo_occup = BNXT_RE_MAX_FIFO_DEPTH - - ((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >> - BNXT_RE_DB_FIFO_ROOM_SHIFT); + fifo_occup = __get_fifo_occupancy(rdev); if (fifo_occup > pacing_data->pacing_th) goto restart_timer; @@ -594,7 +778,7 @@ void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev) * Increase the alarm_th to max so that other user lib instances do not * keep alerting the driver. */ - pacing_data->alarm_th = BNXT_RE_MAX_FIFO_DEPTH; + pacing_data->alarm_th = pacing_data->fifo_max_depth; pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING; cancel_work_sync(&rdev->dbq_fifo_check_work); schedule_work(&rdev->dbq_fifo_check_work); @@ -603,9 +787,6 @@ void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev) static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) { - if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) - return -EIO; - /* Allocate a page for app use */ rdev->pacing.dbr_page = (void *)__get_free_page(GFP_KERNEL); if (!rdev->pacing.dbr_page) @@ -614,6 +795,12 @@ static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) memset((u8 *)rdev->pacing.dbr_page, 0, PAGE_SIZE); rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->pacing.dbr_page; + if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) { + free_page((u64)rdev->pacing.dbr_page); + rdev->pacing.dbr_page = NULL; + return -EIO; + } + /* MAP HW window 2 for reading db fifo depth */ writel(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK, rdev->en_dev->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4); @@ -623,13 +810,16 @@ static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) rdev->pacing.dbr_bar_addr = pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->pacing.dbr_db_fifo_reg_off; + if (is_dbr_fifo_full(rdev)) { + free_page((u64)rdev->pacing.dbr_page); + rdev->pacing.dbr_page = NULL; + return -EIO; + } + rdev->pacing.pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD; rdev->pacing.dbq_pacing_time = BNXT_RE_DBR_PACING_TIME; rdev->pacing.dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION; rdev->pacing.do_pacing_save = rdev->pacing.dbr_def_do_pacing; - rdev->qplib_res.pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH; - rdev->qplib_res.pacing_data->fifo_room_mask = BNXT_RE_DB_FIFO_ROOM_MASK; - rdev->qplib_res.pacing_data->fifo_room_shift = BNXT_RE_DB_FIFO_ROOM_SHIFT; rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off; bnxt_re_set_default_pacing_data(rdev); /* Initialize worker for DBR Pacing */ @@ -779,17 +969,6 @@ static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext) } /* Device */ - -static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev) -{ - struct ib_device *ibdev = - ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE); - if (!ibdev) - return NULL; - - return container_of(ibdev, struct bnxt_re_dev, ibdev); -} - static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { @@ -820,6 +999,253 @@ static const struct attribute_group bnxt_re_dev_attr_group = { .attrs = bnxt_re_attributes, }; +static int bnxt_re_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr) +{ + struct bnxt_qplib_hwq *mr_hwq; + struct nlattr *table_attr; + struct bnxt_re_mr *mr; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + return -EMSGSIZE; + + mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr); + mr_hwq = &mr->qplib_mr.hwq; + + if (rdma_nl_put_driver_u32(msg, "page_size", + mr_hwq->qe_ppg * mr_hwq->element_size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "max_elements", mr_hwq->max_elements)) + goto err; + if (rdma_nl_put_driver_u32(msg, "element_size", mr_hwq->element_size)) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "hwq", (unsigned long)mr_hwq)) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "va", mr->qplib_mr.va)) + goto err; + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +static int bnxt_re_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr) +{ + struct bnxt_re_dev *rdev; + struct bnxt_re_mr *mr; + int err, len; + void *data; + + mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr); + rdev = mr->rdev; + + err = bnxt_re_read_context_allowed(rdev); + if (err) + return err; + + len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 : + BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5; + data = kzalloc(len, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_MRW, + mr->qplib_mr.lkey, len, data); + if (!err) + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); + + kfree(data); + return err; +} + +static int bnxt_re_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq) +{ + struct bnxt_qplib_hwq *cq_hwq; + struct nlattr *table_attr; + struct bnxt_re_cq *cq; + + cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); + cq_hwq = &cq->qplib_cq.hwq; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + return -EMSGSIZE; + + if (rdma_nl_put_driver_u32(msg, "cq_depth", cq_hwq->depth)) + goto err; + if (rdma_nl_put_driver_u32(msg, "max_elements", cq_hwq->max_elements)) + goto err; + if (rdma_nl_put_driver_u32(msg, "element_size", cq_hwq->element_size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "max_wqe", cq->qplib_cq.max_wqe)) + goto err; + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +static int bnxt_re_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) +{ + struct bnxt_re_dev *rdev; + struct bnxt_re_cq *cq; + int err, len; + void *data; + + cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); + rdev = cq->rdev; + + err = bnxt_re_read_context_allowed(rdev); + if (err) + return err; + + len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 : + BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5; + data = kzalloc(len, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = bnxt_qplib_read_context(&rdev->rcfw, + CMDQ_READ_CONTEXT_TYPE_CQ, + cq->qplib_cq.id, len, data); + if (!err) + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); + + kfree(data); + return err; +} + +static int bnxt_re_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp) +{ + struct bnxt_qplib_qp *qplib_qp; + struct nlattr *table_attr; + struct bnxt_re_qp *qp; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + return -EMSGSIZE; + + qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); + qplib_qp = &qp->qplib_qp; + + if (rdma_nl_put_driver_u32(msg, "sq_max_wqe", qplib_qp->sq.max_wqe)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sq_max_sge", qplib_qp->sq.max_sge)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sq_wqe_size", qplib_qp->sq.wqe_size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sq_swq_start", qplib_qp->sq.swq_start)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sq_swq_last", qplib_qp->sq.swq_last)) + goto err; + if (rdma_nl_put_driver_u32(msg, "rq_max_wqe", qplib_qp->rq.max_wqe)) + goto err; + if (rdma_nl_put_driver_u32(msg, "rq_max_sge", qplib_qp->rq.max_sge)) + goto err; + if (rdma_nl_put_driver_u32(msg, "rq_wqe_size", qplib_qp->rq.wqe_size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "rq_swq_start", qplib_qp->rq.swq_start)) + goto err; + if (rdma_nl_put_driver_u32(msg, "rq_swq_last", qplib_qp->rq.swq_last)) + goto err; + if (rdma_nl_put_driver_u32(msg, "timeout", qplib_qp->timeout)) + goto err; + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +static int bnxt_re_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) +{ + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibqp->device, ibdev); + int err, len; + void *data; + + err = bnxt_re_read_context_allowed(rdev); + if (err) + return err; + + len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 : + BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5; + data = kzalloc(len, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_QPC, + ibqp->qp_num, len, data); + if (!err) + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); + + kfree(data); + return err; +} + +static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq) +{ + struct nlattr *table_attr; + struct bnxt_re_srq *srq; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + return -EMSGSIZE; + + srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); + + if (rdma_nl_put_driver_u32_hex(msg, "wqe_size", srq->qplib_srq.wqe_size)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "max_wqe", srq->qplib_srq.max_wqe)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge)) + goto err; + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +static int bnxt_re_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq) +{ + struct bnxt_re_dev *rdev; + struct bnxt_re_srq *srq; + int err, len; + void *data; + + srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); + rdev = srq->rdev; + + err = bnxt_re_read_context_allowed(rdev); + if (err) + return err; + + len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 : + BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5; + + data = kzalloc(len, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_SRQ, + srq->qplib_srq.id, len, data); + if (!err) + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); + + kfree(data); + return err; +} + static const struct ib_device_ops bnxt_re_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_BNXT_RE, @@ -861,6 +1287,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .post_srq_recv = bnxt_re_post_srq_recv, .query_ah = bnxt_re_query_ah, .query_device = bnxt_re_query_device, + .modify_device = bnxt_re_modify_device, .query_pkey = bnxt_re_query_pkey, .query_port = bnxt_re_query_port, .query_qp = bnxt_re_query_qp, @@ -877,6 +1304,17 @@ static const struct ib_device_ops bnxt_re_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx), }; +static const struct ib_device_ops restrack_ops = { + .fill_res_cq_entry = bnxt_re_fill_res_cq_entry, + .fill_res_cq_entry_raw = bnxt_re_fill_res_cq_entry_raw, + .fill_res_qp_entry = bnxt_re_fill_res_qp_entry, + .fill_res_qp_entry_raw = bnxt_re_fill_res_qp_entry_raw, + .fill_res_mr_entry = bnxt_re_fill_res_mr_entry, + .fill_res_mr_entry_raw = bnxt_re_fill_res_mr_entry_raw, + .fill_res_srq_entry = bnxt_re_fill_res_srq_entry, + .fill_res_srq_entry_raw = bnxt_re_fill_res_srq_entry_raw, +}; + static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; @@ -890,7 +1328,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr); - ibdev->num_comp_vectors = rdev->num_msix - 1; + ibdev->num_comp_vectors = rdev->nqr->num_msix - 1; ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; @@ -898,6 +1336,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->driver_def = bnxt_re_uapi_defs; ib_set_device_ops(ibdev, &bnxt_re_dev_ops); + ib_set_device_ops(ibdev, &restrack_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); if (ret) return ret; @@ -907,7 +1346,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev); } -static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv, +static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev, struct bnxt_en_dev *en_dev) { struct bnxt_re_dev *rdev; @@ -920,9 +1359,9 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv, return NULL; } /* Default values */ - rdev->nb.notifier_call = NULL; rdev->netdev = en_dev->net; rdev->en_dev = en_dev; + rdev->adev = adev; rdev->id = rdev->en_dev->pdev->devfn; INIT_LIST_HEAD(&rdev->qp_list); mutex_init(&rdev->qp_lock); @@ -936,6 +1375,15 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv, atomic_set(&rdev->stats.res.pd_count, 0); rdev->cosq[0] = 0xFFFF; rdev->cosq[1] = 0xFFFF; + rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME; + if (bnxt_re_chip_gen_p7(en_dev->chip_num)) { + rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7; + rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7; + } else { + rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5; + rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5; + } + rdev->cq_coalescing.en_ring_idle_mode = BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE; return rdev; } @@ -975,12 +1423,15 @@ static int bnxt_re_handle_unaffi_async_event(struct creq_func_event static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, struct bnxt_re_qp *qp) { - struct bnxt_re_srq *srq = container_of(qp->qplib_qp.srq, struct bnxt_re_srq, - qplib_srq); struct creq_qp_error_notification *err_event; + struct bnxt_re_srq *srq = NULL; struct ib_event event = {}; unsigned int flags; + if (qp->qplib_qp.srq) + srq = container_of(qp->qplib_qp.srq, struct bnxt_re_srq, + qplib_srq); + if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && rdma_is_kernel_res(&qp->ib_qp.res)) { flags = bnxt_re_lock_cqs(qp); @@ -1208,15 +1659,9 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, { struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq, qplib_cq); - u32 *cq_ptr; - if (cq->ib_cq.comp_handler) { - if (cq->uctx_cq_page) { - cq_ptr = (u32 *)cq->uctx_cq_page; - *cq_ptr = cq->qplib_cq.toggle; - } + if (cq->ib_cq.comp_handler) (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context); - } return 0; } @@ -1225,8 +1670,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) { int i; - for (i = 1; i < rdev->num_msix; i++) - bnxt_qplib_disable_nq(&rdev->nq[i - 1]); + for (i = 1; i < rdev->nqr->num_msix; i++) + bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]); if (rdev->qplib_res.rcfw) bnxt_qplib_cleanup_res(&rdev->qplib_res); @@ -1240,10 +1685,12 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) bnxt_qplib_init_res(&rdev->qplib_res); - for (i = 1; i < rdev->num_msix ; i++) { - db_offt = rdev->en_dev->msix_entries[i].db_offset; - rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], - i - 1, rdev->en_dev->msix_entries[i].vector, + mutex_init(&rdev->nqr->load_lock); + + for (i = 1; i < rdev->nqr->num_msix ; i++) { + db_offt = rdev->nqr->msix_entries[i].db_offset; + rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1], + i - 1, rdev->nqr->msix_entries[i].vector, db_offt, &bnxt_re_cqn_handler, &bnxt_re_srqn_handler); if (rc) { @@ -1256,20 +1703,22 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) return 0; fail: for (i = num_vec_enabled; i >= 0; i--) - bnxt_qplib_disable_nq(&rdev->nq[i]); + bnxt_qplib_disable_nq(&rdev->nqr->nq[i]); return rc; } static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) { + struct bnxt_qplib_nq *nq; u8 type; int i; - for (i = 0; i < rdev->num_msix - 1; i++) { + for (i = 0; i < rdev->nqr->num_msix - 1; i++) { type = bnxt_qplib_get_ring_type(rdev->chip_ctx); - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); - bnxt_qplib_free_nq(&rdev->nq[i]); - rdev->nq[i].res = NULL; + nq = &rdev->nqr->nq[i]; + bnxt_re_net_ring_free(rdev, nq->ring_id, type); + bnxt_qplib_free_nq(nq); + nq->res = NULL; } } @@ -1296,12 +1745,11 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); if (rc) goto fail; - rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev, - rdev->netdev, &rdev->dev_attr); + rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->netdev); if (rc) goto fail; @@ -1311,12 +1759,12 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) if (rc) goto dealloc_res; - for (i = 0; i < rdev->num_msix - 1; i++) { + for (i = 0; i < rdev->nqr->num_msix - 1; i++) { struct bnxt_qplib_nq *nq; - nq = &rdev->nq[i]; + nq = &rdev->nqr->nq[i]; nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; - rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]); + rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq); if (rc) { ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x", i, rc); @@ -1324,17 +1772,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) } type = bnxt_qplib_get_ring_type(rdev->chip_ctx); rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr; - rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count; + rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count; rattr.type = type; rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1; - rattr.lrid = rdev->en_dev->msix_entries[i + 1].ring_idx; + rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx; rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate NQ fw id with rc = 0x%x", rc); - bnxt_qplib_free_nq(&rdev->nq[i]); + bnxt_qplib_free_nq(nq); goto free_nq; } num_vec_created++; @@ -1343,8 +1791,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) free_nq: for (i = num_vec_created - 1; i >= 0; i--) { type = bnxt_qplib_get_ring_type(rdev->chip_ctx); - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); - bnxt_qplib_free_nq(&rdev->nq[i]); + bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type); + bnxt_qplib_free_nq(&rdev->nqr->nq[i]); } bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &rdev->dpi_privileged); @@ -1384,11 +1832,8 @@ static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev, static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev) { - int mask = IB_QP_STATE; - struct ib_qp_attr qp_attr; struct bnxt_re_qp *qp; - qp_attr.qp_state = IB_QPS_ERR; mutex_lock(&rdev->qp_lock); list_for_each_entry(qp, &rdev->qp_list, list) { /* Modify the state of all QPs except QP1/Shadow QP */ @@ -1396,12 +1841,9 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev) if (qp->qplib_qp.state != CMDQ_MODIFY_QP_NEW_STATE_RESET && qp->qplib_qp.state != - CMDQ_MODIFY_QP_NEW_STATE_ERR) { + CMDQ_MODIFY_QP_NEW_STATE_ERR) bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp, 1, IB_EVENT_QP_FATAL); - bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask, - NULL); - } } } mutex_unlock(&rdev->qp_lock); @@ -1482,6 +1924,26 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return 0; } +static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev) +{ + if (rdev->is_virtfn) + return; + + memset(&rdev->event_bitmap, 0, sizeof(rdev->event_bitmap)); + bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap, + ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); +} + +static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev) +{ + if (rdev->is_virtfn) + return; + + rdev->event_bitmap |= (1 << ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); + bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap, + ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); +} + static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; @@ -1539,11 +2001,31 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) return rc; } -static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev) +static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev) +{ + rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL); + if (!rdev->nqr) + return -ENOMEM; + + return 0; +} + +static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev) +{ + kfree(rdev->nqr); + rdev->nqr = NULL; +} + +static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) { u8 type; int rc; + bnxt_re_debugfs_rem_pdev(rdev); + + bnxt_re_net_unregister_async_event(rdev); + bnxt_re_uninit_dcb_wq(rdev); + if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) cancel_delayed_work_sync(&rdev->worker); @@ -1566,14 +2048,17 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev) bnxt_qplib_free_rcfw_channel(&rdev->rcfw); } - rdev->num_msix = 0; + rdev->nqr->num_msix = 0; if (rdev->pacing.dbr_pacing) bnxt_re_deinitialize_dbr_pacing(rdev); + bnxt_re_free_nqr_mem(rdev); bnxt_re_destroy_chip_ctx(rdev); - if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) - bnxt_unregister_dev(rdev->en_dev); + if (op_type == BNXT_RE_COMPLETE_REMOVE) { + if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) + bnxt_unregister_dev(rdev->en_dev); + } } /* worker thread for polling periodic events. Now used for QoS programming*/ @@ -1586,7 +2071,7 @@ static void bnxt_re_worker(struct work_struct *work) schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); } -static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) +static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) { struct bnxt_re_ring_attr rattr = {}; struct bnxt_qplib_creq_ctx *creq; @@ -1595,16 +2080,29 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) u8 type; int rc; - /* Registered a new RoCE device instance to netdev */ - rc = bnxt_re_register_netdev(rdev); - if (rc) { + if (op_type == BNXT_RE_COMPLETE_INIT) { + /* Registered a new RoCE device instance to netdev */ + rc = bnxt_re_register_netdev(rdev); + if (rc) { + ibdev_err(&rdev->ibdev, + "Failed to register with netedev: %#x\n", rc); + return -EINVAL; + } + } + set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); + + if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) { ibdev_err(&rdev->ibdev, - "Failed to register with netedev: %#x\n", rc); + "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n", + rdev->en_dev->ulp_tbl->msix_requested); + bnxt_unregister_dev(rdev->en_dev); + clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); return -EINVAL; } - set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); + ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", + rdev->en_dev->ulp_tbl->msix_requested); - rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode); + rc = bnxt_re_setup_chip_ctx(rdev); if (rc) { bnxt_unregister_dev(rdev->en_dev); clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); @@ -1612,27 +2110,27 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) return -EINVAL; } + rc = bnxt_re_alloc_nqr_mem(rdev); + if (rc) { + bnxt_re_destroy_chip_ctx(rdev); + bnxt_unregister_dev(rdev->en_dev); + clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); + return rc; + } + rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested; + memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries, + sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix); + /* Check whether VF or PF */ bnxt_re_get_sriov_func_type(rdev); - if (!rdev->en_dev->ulp_tbl->msix_requested) { - ibdev_err(&rdev->ibdev, - "Failed to get MSI-X vectors: %#x\n", rc); - rc = -EINVAL; - goto fail; - } - ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", - rdev->en_dev->ulp_tbl->msix_requested); - rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested; - bnxt_re_query_hwrm_intf_version(rdev); /* Establish RCFW Communication Channel to initialize the context * memory for the function and all child VFs */ rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw, - &rdev->qplib_ctx, - BNXT_RE_MAX_QPC_COUNT); + &rdev->qplib_ctx); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate RCFW Channel: %#x\n", rc); @@ -1646,14 +2144,14 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) rattr.type = type; rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1; - rattr.lrid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; + rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); goto free_rcfw; } - db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset; - vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector; + db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset; + vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector; rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vid, db_offt, &bnxt_re_aeq_handler); @@ -1673,7 +2171,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) rdev->pacing.dbr_pacing = false; } } - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); if (rc) goto disable_rcfw; @@ -1722,6 +2220,11 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags); if (!rdev->is_virtfn) { + /* Query f/w defaults of CC params */ + rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param); + if (rc) + ibdev_warn(&rdev->ibdev, "Failed to query CC defaults\n"); + rc = bnxt_re_setup_qos(rdev); if (rc) ibdev_info(&rdev->ibdev, @@ -1730,13 +2233,18 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags); schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); - /* - * Use the total VF count since the actual VF count may not be - * available at this point. - */ - bnxt_re_vf_res_config(rdev); + + if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT)) + bnxt_re_vf_res_config(rdev); } hash_init(rdev->cq_hash); + if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) + hash_init(rdev->srq_hash); + + bnxt_re_debugfs_add_pdev(rdev); + + bnxt_re_init_dcb_wq(rdev); + bnxt_re_net_register_async_event(rdev); return 0; free_sctx: @@ -1751,50 +2259,11 @@ free_ring: free_rcfw: bnxt_qplib_free_rcfw_channel(&rdev->rcfw); fail: - bnxt_re_dev_uninit(rdev); + bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); return rc; } -static int bnxt_re_add_device(struct auxiliary_device *adev, u8 wqe_mode) -{ - struct bnxt_aux_priv *aux_priv = - container_of(adev, struct bnxt_aux_priv, aux_dev); - struct bnxt_en_dev *en_dev; - struct bnxt_re_dev *rdev; - int rc; - - /* en_dev should never be NULL as long as adev and aux_dev are valid. */ - en_dev = aux_priv->edev; - - rdev = bnxt_re_dev_add(aux_priv, en_dev); - if (!rdev || !rdev_to_dev(rdev)) { - rc = -ENOMEM; - goto exit; - } - - rc = bnxt_re_dev_init(rdev, wqe_mode); - if (rc) - goto re_dev_dealloc; - - rc = bnxt_re_ib_init(rdev); - if (rc) { - pr_err("Failed to register with IB: %s", - aux_priv->aux_dev.name); - goto re_dev_uninit; - } - auxiliary_set_drvdata(adev, rdev); - - return 0; - -re_dev_uninit: - bnxt_re_dev_uninit(rdev); -re_dev_dealloc: - ib_dealloc_device(&rdev->ibdev); -exit: - return rc; -} - static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) { struct bnxt_qplib_cc_param cc_param = {}; @@ -1809,142 +2278,139 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) if (enable) { cc_param.enable = 1; - cc_param.cc_mode = CMDQ_MODIFY_ROCE_CC_CC_MODE_PROBABILISTIC_CC_MODE; + cc_param.tos_ecn = 1; } - cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE | - CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | + cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param)) ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable); } -/* - * "Notifier chain callback can be invoked for the same chain from - * different CPUs at the same time". - * - * For cases when the netdev is already present, our call to the - * register_netdevice_notifier() will actually get the rtnl_lock() - * before sending NETDEV_REGISTER and (if up) NETDEV_UP - * events. - * - * But for cases when the netdev is not already present, the notifier - * chain is subjected to be invoked from different CPUs simultaneously. - * - * This is protected by the netdev_mutex. - */ -static int bnxt_re_netdev_event(struct notifier_block *notifier, - unsigned long event, void *ptr) +static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev, + struct bnxt_re_en_dev_info *en_info, + struct auxiliary_device *adev) { - struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr); + /* Before updating the rdev pointer in bnxt_re_en_dev_info structure, + * take the rtnl lock to avoid accessing invalid rdev pointer from + * L2 ULP callbacks. This is applicable in all the places where rdev + * pointer is updated in bnxt_re_en_dev_info. + */ + rtnl_lock(); + en_info->rdev = rdev; + rtnl_unlock(); +} + +static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type) +{ + struct bnxt_aux_priv *aux_priv = + container_of(adev, struct bnxt_aux_priv, aux_dev); + struct bnxt_re_en_dev_info *en_info; + struct bnxt_en_dev *en_dev; struct bnxt_re_dev *rdev; + int rc; + + en_info = auxiliary_get_drvdata(adev); + en_dev = en_info->en_dev; - real_dev = rdma_vlan_dev_real_dev(netdev); - if (!real_dev) - real_dev = netdev; - if (real_dev != netdev) + rdev = bnxt_re_dev_add(adev, en_dev); + if (!rdev || !rdev_to_dev(rdev)) { + rc = -ENOMEM; goto exit; + } - rdev = bnxt_re_from_netdev(real_dev); - if (!rdev) - return NOTIFY_DONE; + bnxt_re_update_en_info_rdev(rdev, en_info, adev); + rc = bnxt_re_dev_init(rdev, op_type); + if (rc) + goto re_dev_dealloc; - switch (event) { - case NETDEV_UP: - case NETDEV_DOWN: - case NETDEV_CHANGE: - bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, - netif_carrier_ok(real_dev) ? - IB_EVENT_PORT_ACTIVE : - IB_EVENT_PORT_ERR); - break; - default: - break; + rc = bnxt_re_ib_init(rdev); + if (rc) { + pr_err("Failed to register with IB: %s", + aux_priv->aux_dev.name); + goto re_dev_uninit; } - ib_device_put(&rdev->ibdev); + + bnxt_re_setup_cc(rdev, true); + + return 0; + +re_dev_uninit: + bnxt_re_update_en_info_rdev(NULL, en_info, adev); + bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); +re_dev_dealloc: + ib_dealloc_device(&rdev->ibdev); exit: - return NOTIFY_DONE; + return rc; } #define BNXT_ADEV_NAME "bnxt_en" -static void bnxt_re_remove(struct auxiliary_device *adev) +static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type, + struct auxiliary_device *aux_dev) { - struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev); - - if (!rdev) - return; - - mutex_lock(&bnxt_re_mutex); - if (rdev->nb.notifier_call) { - unregister_netdevice_notifier(&rdev->nb); - rdev->nb.notifier_call = NULL; - } else { - /* If notifier is null, we should have already done a - * clean up before coming here. - */ - goto skip_remove; - } bnxt_re_setup_cc(rdev, false); ib_unregister_device(&rdev->ibdev); - bnxt_re_dev_uninit(rdev); + bnxt_re_dev_uninit(rdev, op_type); ib_dealloc_device(&rdev->ibdev); -skip_remove: +} + +static void bnxt_re_remove(struct auxiliary_device *adev) +{ + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); + struct bnxt_re_dev *rdev; + + mutex_lock(&bnxt_re_mutex); + rdev = en_info->rdev; + + if (rdev) + bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev); + kfree(en_info); mutex_unlock(&bnxt_re_mutex); } static int bnxt_re_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { - struct bnxt_re_dev *rdev; + struct bnxt_aux_priv *aux_priv = + container_of(adev, struct bnxt_aux_priv, aux_dev); + struct bnxt_re_en_dev_info *en_info; + struct bnxt_en_dev *en_dev; int rc; + en_dev = aux_priv->edev; + mutex_lock(&bnxt_re_mutex); - rc = bnxt_re_add_device(adev, BNXT_QPLIB_WQE_MODE_STATIC); - if (rc) { + en_info = kzalloc(sizeof(*en_info), GFP_KERNEL); + if (!en_info) { mutex_unlock(&bnxt_re_mutex); - return rc; + return -ENOMEM; } + en_info->en_dev = en_dev; - rdev = auxiliary_get_drvdata(adev); + auxiliary_set_drvdata(adev, en_info); - rdev->nb.notifier_call = bnxt_re_netdev_event; - rc = register_netdevice_notifier(&rdev->nb); - if (rc) { - rdev->nb.notifier_call = NULL; - pr_err("%s: Cannot register to netdevice_notifier", - ROCE_DRV_MODULE_NAME); - goto err; - } - - bnxt_re_setup_cc(rdev, true); - mutex_unlock(&bnxt_re_mutex); - return 0; + rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT); + if (rc) + kfree(en_info); -err: mutex_unlock(&bnxt_re_mutex); - bnxt_re_remove(adev); return rc; } static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) { - struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev); - - if (!rdev) - return 0; + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); + struct bnxt_en_dev *en_dev; + struct bnxt_re_dev *rdev; + rdev = en_info->rdev; + en_dev = en_info->en_dev; mutex_lock(&bnxt_re_mutex); - /* L2 driver may invoke this callback during device error/crash or device - * reset. Current RoCE driver doesn't recover the device in case of - * error. Handle the error by dispatching fatal events to all qps - * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as - * L2 driver want to modify the MSIx table. - */ ibdev_info(&rdev->ibdev, "Handle device suspend call"); /* Check the current device state from bnxt_en_dev and move the @@ -1952,17 +2418,20 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) * This prevents more commands to HW during clean-up, * in case the device is already in error. */ - if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) + if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) { set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); + set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); + wake_up_all(&rdev->rcfw.cmdq.waitq); + bnxt_re_dev_stop(rdev); + } - bnxt_re_dev_stop(rdev); - bnxt_re_stop_irq(rdev); - /* Move the device states to detached and avoid sending any more - * commands to HW - */ - set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); - set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); - wake_up_all(&rdev->rcfw.cmdq.waitq); + if (rdev->pacing.dbr_pacing) + bnxt_re_set_pacing_dev_state(rdev); + + ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx", + __func__, en_dev->en_state); + bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev); + bnxt_re_update_en_info_rdev(NULL, en_info, adev); mutex_unlock(&bnxt_re_mutex); return 0; @@ -1970,25 +2439,28 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) static int bnxt_re_resume(struct auxiliary_device *adev) { - struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev); - - if (!rdev) - return 0; + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); + struct bnxt_re_dev *rdev; mutex_lock(&bnxt_re_mutex); - /* L2 driver may invoke this callback during device recovery, resume. - * reset. Current RoCE driver doesn't recover the device in case of - * error. Handle the error by dispatching fatal events to all qps - * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as - * L2 driver want to modify the MSIx table. - */ - - ibdev_info(&rdev->ibdev, "Handle device resume call"); + bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT); + rdev = en_info->rdev; + ibdev_info(&rdev->ibdev, "Device resume completed"); mutex_unlock(&bnxt_re_mutex); return 0; } +static void bnxt_re_shutdown(struct auxiliary_device *adev) +{ + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); + struct bnxt_re_dev *rdev; + + rdev = en_info->rdev; + ib_unregister_device(&rdev->ibdev); + bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); +} + static const struct auxiliary_device_id bnxt_re_id_table[] = { { .name = BNXT_ADEV_NAME ".rdma", }, {}, @@ -2011,18 +2483,24 @@ static int __init bnxt_re_mod_init(void) int rc; pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version); + bnxt_re_register_debugfs(); + rc = auxiliary_driver_register(&bnxt_re_driver); if (rc) { pr_err("%s: Failed to register auxiliary driver\n", ROCE_DRV_MODULE_NAME); - return rc; + goto err_debug; } return 0; +err_debug: + bnxt_re_unregister_debugfs(); + return rc; } static void __exit bnxt_re_mod_exit(void) { auxiliary_driver_unregister(&bnxt_re_driver); + bnxt_re_unregister_debugfs(); } module_init(bnxt_re_mod_init); |