diff options
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_init.c')
-rw-r--r-- | drivers/scsi/lpfc/lpfc_init.c | 1335 |
1 files changed, 784 insertions, 551 deletions
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index eaaef682de25..e8813d26e594 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -39,6 +39,7 @@ #include <linux/msi.h> #include <linux/irq.h> #include <linux/bitops.h> +#include <linux/crash_dump.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> @@ -65,14 +66,8 @@ #include "lpfc_version.h" #include "lpfc_ids.h" -char *_dump_buf_data; -unsigned long _dump_buf_data_order; -char *_dump_buf_dif; -unsigned long _dump_buf_dif_order; -spinlock_t _dump_buf_lock; - /* Used when mapping IRQ vectors in a driver centric manner */ -uint32_t lpfc_present_cpu; +static uint32_t lpfc_present_cpu; static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *); static int lpfc_post_rcv_buf(struct lpfc_hba *); @@ -93,8 +88,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *); static void lpfc_sli4_disable_intr(struct lpfc_hba *); static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t); static void lpfc_sli4_oas_verify(struct lpfc_hba *phba); -static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t); static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int); +static void lpfc_setup_bg(struct lpfc_hba *, struct Scsi_Host *); static struct scsi_transport_template *lpfc_transport_template = NULL; static struct scsi_transport_template *lpfc_vport_transport_template = NULL; @@ -1081,8 +1076,8 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { qp = &phba->sli4_hba.hdwq[idx]; - spin_lock(&qp->abts_scsi_buf_list_lock); - list_splice_init(&qp->lpfc_abts_scsi_buf_list, + spin_lock(&qp->abts_io_buf_list_lock); + list_splice_init(&qp->lpfc_abts_io_buf_list, &aborts); list_for_each_entry_safe(psb, psb_next, &aborts, list) { @@ -1093,29 +1088,11 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) spin_lock(&qp->io_buf_list_put_lock); list_splice_init(&aborts, &qp->lpfc_io_buf_list_put); qp->put_io_bufs += qp->abts_scsi_io_bufs; + qp->put_io_bufs += qp->abts_nvme_io_bufs; qp->abts_scsi_io_bufs = 0; + qp->abts_nvme_io_bufs = 0; spin_unlock(&qp->io_buf_list_put_lock); - spin_unlock(&qp->abts_scsi_buf_list_lock); - - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - spin_lock(&qp->abts_nvme_buf_list_lock); - list_splice_init(&qp->lpfc_abts_nvme_buf_list, - &nvme_aborts); - list_for_each_entry_safe(psb, psb_next, &nvme_aborts, - list) { - psb->pCmd = NULL; - psb->status = IOSTAT_SUCCESS; - cnt++; - } - spin_lock(&qp->io_buf_list_put_lock); - qp->put_io_bufs += qp->abts_nvme_io_bufs; - qp->abts_nvme_io_bufs = 0; - list_splice_init(&nvme_aborts, - &qp->lpfc_io_buf_list_put); - spin_unlock(&qp->io_buf_list_put_lock); - spin_unlock(&qp->abts_nvme_buf_list_lock); - - } + spin_unlock(&qp->abts_io_buf_list_lock); } spin_unlock_irq(&phba->hbalock); @@ -1261,6 +1238,7 @@ lpfc_hb_eq_delay_work(struct work_struct *work) unsigned char *eqcnt = NULL; uint32_t usdelay; int i; + bool update = false; if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING) return; @@ -1274,18 +1252,29 @@ lpfc_hb_eq_delay_work(struct work_struct *work) if (!eqcnt) goto requeue; - for (i = 0; i < phba->cfg_irq_chann; i++) { - eq = phba->sli4_hba.hdwq[i].hba_eq; - if (eq && eqcnt[eq->last_cpu] < 2) - eqcnt[eq->last_cpu]++; - continue; - } + if (phba->cfg_irq_chann > 1) { + /* Loop thru all IRQ vectors */ + for (i = 0; i < phba->cfg_irq_chann; i++) { + /* Get the EQ corresponding to the IRQ vector */ + eq = phba->sli4_hba.hba_eq_hdl[i].eq; + if (!eq) + continue; + if (eq->q_mode) { + update = true; + break; + } + if (eqcnt[eq->last_cpu] < 2) + eqcnt[eq->last_cpu]++; + } + } else + update = true; for_each_present_cpu(i) { - if (phba->cfg_irq_chann > 1 && eqcnt[i] < 2) - continue; - eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i); + if (!update && eqcnt[i] < 2) { + eqi->icnt = 0; + continue; + } usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) * LPFC_EQ_DELAY_STEP; @@ -1533,6 +1522,7 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba) spin_unlock_irq(&phba->hbalock); lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT); + lpfc_sli_flush_io_rings(phba); lpfc_offline(phba); lpfc_hba_down_post(phba); lpfc_unblock_mgmt_io(phba); @@ -1794,6 +1784,7 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action, "2887 Reset Needed: Attempting Port " "Recovery...\n"); lpfc_offline_prep(phba, mbx_action); + lpfc_sli_flush_io_rings(phba); lpfc_offline(phba); /* release interrupt for possible resource change */ lpfc_sli4_disable_intr(phba); @@ -1913,7 +1904,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "7624 Firmware not ready: Failing UE recovery," " waited %dSec", i); - lpfc_sli4_offline_eratt(phba); + phba->link_state = LPFC_HBA_ERROR; break; case LPFC_SLI_INTF_IF_TYPE_2: @@ -1987,9 +1978,8 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) } /* fall through for not able to recover */ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3152 Unrecoverable error, bring the port " - "offline\n"); - lpfc_sli4_offline_eratt(phba); + "3152 Unrecoverable error\n"); + phba->link_state = LPFC_HBA_ERROR; break; case LPFC_SLI_INTF_IF_TYPE_1: default: @@ -2861,7 +2851,7 @@ lpfc_cleanup(struct lpfc_vport *vport) &vport->fc_nodes, nlp_listp) { lpfc_printf_vlog(ndlp->vport, KERN_ERR, LOG_NODE, - "0282 did:x%x ndlp:x%p " + "0282 did:x%x ndlp:x%px " "usgmap:x%x refcnt:%d\n", ndlp->nlp_DID, (void *)ndlp, ndlp->nlp_usg_map, @@ -2963,7 +2953,7 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba) del_timer_sync(&phba->fcp_poll_timer); break; case LPFC_PCI_DEV_OC: - /* Stop any OneConnect device sepcific driver timers */ + /* Stop any OneConnect device specific driver timers */ lpfc_sli4_stop_fcf_redisc_wait_timer(phba); break; default: @@ -3065,7 +3055,7 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba) ndlp->nlp_rpi = rpi; lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, "0009 rpi:%x DID:%x " - "flg:%x map:%x %p\n", ndlp->nlp_rpi, + "flg:%x map:%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_usg_map, ndlp); } @@ -3250,12 +3240,8 @@ static void lpfc_destroy_multixri_pools(struct lpfc_hba *phba) if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) lpfc_destroy_expedite_pool(phba); - if (!(phba->pport->load_flag & FC_UNLOADING)) { - lpfc_sli_flush_fcp_rings(phba); - - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) - lpfc_sli_flush_nvme_rings(phba); - } + if (!(phba->pport->load_flag & FC_UNLOADING)) + lpfc_sli_flush_io_rings(phba); hwq_count = phba->cfg_hdw_queue; @@ -3489,7 +3475,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, "0011 lpfc_offline: " - "ndlp:x%p did %x " + "ndlp:x%px did %x " "usgmap:x%x rpi:%x\n", ndlp, ndlp->nlp_DID, ndlp->nlp_usg_map, @@ -3634,6 +3620,9 @@ lpfc_io_free(struct lpfc_hba *phba) qp->put_io_bufs--; dma_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data, lpfc_ncmd->dma_handle); + if (phba->cfg_xpsgl && !phba->nvmet_support) + lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd); + lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd); kfree(lpfc_ncmd); qp->total_io_bufs--; } @@ -3647,6 +3636,9 @@ lpfc_io_free(struct lpfc_hba *phba) qp->get_io_bufs--; dma_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data, lpfc_ncmd->dma_handle); + if (phba->cfg_xpsgl && !phba->nvmet_support) + lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd); + lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd); kfree(lpfc_ncmd); qp->total_io_bufs--; } @@ -4095,18 +4087,9 @@ lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc) LIST_HEAD(post_nblist); LIST_HEAD(nvme_nblist); - /* Sanity check to ensure our sizing is right for both SCSI and NVME */ - if (sizeof(struct lpfc_io_buf) > LPFC_COMMON_IO_BUF_SZ) { - lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "6426 Common buffer size %zd exceeds %d\n", - sizeof(struct lpfc_io_buf), - LPFC_COMMON_IO_BUF_SZ); - return 0; - } - phba->sli4_hba.io_xri_cnt = 0; for (bcnt = 0; bcnt < num_to_alloc; bcnt++) { - lpfc_ncmd = kzalloc(LPFC_COMMON_IO_BUF_SZ, GFP_KERNEL); + lpfc_ncmd = kzalloc(sizeof(*lpfc_ncmd), GFP_KERNEL); if (!lpfc_ncmd) break; /* @@ -4114,31 +4097,38 @@ lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc) * pci bus space for an I/O. The DMA buffer includes the * number of SGE's necessary to support the sg_tablesize. */ - lpfc_ncmd->data = dma_pool_alloc(phba->lpfc_sg_dma_buf_pool, - GFP_KERNEL, - &lpfc_ncmd->dma_handle); + lpfc_ncmd->data = dma_pool_zalloc(phba->lpfc_sg_dma_buf_pool, + GFP_KERNEL, + &lpfc_ncmd->dma_handle); if (!lpfc_ncmd->data) { kfree(lpfc_ncmd); break; } - memset(lpfc_ncmd->data, 0, phba->cfg_sg_dma_buf_size); - /* - * 4K Page alignment is CRITICAL to BlockGuard, double check - * to be sure. - */ - if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) && - (((unsigned long)(lpfc_ncmd->data) & - (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) { - lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "3369 Memory alignment err: addr=%lx\n", - (unsigned long)lpfc_ncmd->data); - dma_pool_free(phba->lpfc_sg_dma_buf_pool, - lpfc_ncmd->data, lpfc_ncmd->dma_handle); - kfree(lpfc_ncmd); - break; + if (phba->cfg_xpsgl && !phba->nvmet_support) { + INIT_LIST_HEAD(&lpfc_ncmd->dma_sgl_xtra_list); + } else { + /* + * 4K Page alignment is CRITICAL to BlockGuard, double + * check to be sure. + */ + if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) && + (((unsigned long)(lpfc_ncmd->data) & + (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) { + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, + "3369 Memory alignment err: " + "addr=%lx\n", + (unsigned long)lpfc_ncmd->data); + dma_pool_free(phba->lpfc_sg_dma_buf_pool, + lpfc_ncmd->data, + lpfc_ncmd->dma_handle); + kfree(lpfc_ncmd); + break; + } } + INIT_LIST_HEAD(&lpfc_ncmd->dma_cmd_rsp_list); + lxri = lpfc_sli4_next_xritag(phba); if (lxri == NO_XRI) { dma_pool_free(phba->lpfc_sg_dma_buf_pool, @@ -4308,14 +4298,20 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) shost->max_cmd_len = 16; if (phba->sli_rev == LPFC_SLI_REV4) { - if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) - shost->nr_hw_queues = phba->cfg_hdw_queue; - else - shost->nr_hw_queues = phba->sli4_hba.num_present_cpu; + if (!phba->cfg_fcp_mq_threshold || + phba->cfg_fcp_mq_threshold > phba->cfg_hdw_queue) + phba->cfg_fcp_mq_threshold = phba->cfg_hdw_queue; + + shost->nr_hw_queues = min_t(int, 2 * num_possible_nodes(), + phba->cfg_fcp_mq_threshold); shost->dma_boundary = phba->sli4_hba.pc_sli4_params.sge_supp_len-1; - shost->sg_tablesize = phba->cfg_scsi_seg_cnt; + + if (phba->cfg_xpsgl && !phba->nvmet_support) + shost->sg_tablesize = LPFC_MAX_SG_TABLESIZE; + else + shost->sg_tablesize = phba->cfg_scsi_seg_cnt; } else /* SLI-3 has a limited number of hardware queues (3), * thus there is only one for FCP processing. @@ -4347,6 +4343,9 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) timer_setup(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo, 0); + if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) + lpfc_setup_bg(phba, shost); + error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev); if (error) goto out_put_shost; @@ -5055,7 +5054,7 @@ lpfc_update_trunk_link_status(struct lpfc_hba *phba, bf_get(lpfc_acqe_fc_la_speed, acqe_fc)); phba->sli4_hba.link_state.logical_speed = - bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc); + bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc) * 10; /* We got FC link speed, convert to fc_linkspeed (READ_TOPOLOGY) */ phba->fc_linkspeed = lpfc_async_link_speed_to_read_top( @@ -5158,8 +5157,14 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) bf_get(lpfc_acqe_fc_la_port_number, acqe_fc); phba->sli4_hba.link_state.fault = bf_get(lpfc_acqe_link_fault, acqe_fc); - phba->sli4_hba.link_state.logical_speed = + + if (bf_get(lpfc_acqe_fc_la_att_type, acqe_fc) == + LPFC_FC_LA_TYPE_LINK_DOWN) + phba->sli4_hba.link_state.logical_speed = 0; + else if (!phba->sli4_hba.conf_trunk) + phba->sli4_hba.link_state.logical_speed = bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc) * 10; + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "2896 Async FC event - Speed:%dGBaud Topology:x%x " "LA Type:x%x Port Type:%d Port Number:%d Logical speed:" @@ -6324,6 +6329,24 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba) if (lpfc_mem_alloc(phba, BPL_ALIGN_SZ)) return -ENOMEM; + phba->lpfc_sg_dma_buf_pool = + dma_pool_create("lpfc_sg_dma_buf_pool", + &phba->pcidev->dev, phba->cfg_sg_dma_buf_size, + BPL_ALIGN_SZ, 0); + + if (!phba->lpfc_sg_dma_buf_pool) + goto fail_free_mem; + + phba->lpfc_cmd_rsp_buf_pool = + dma_pool_create("lpfc_cmd_rsp_buf_pool", + &phba->pcidev->dev, + sizeof(struct fcp_cmnd) + + sizeof(struct fcp_rsp), + BPL_ALIGN_SZ, 0); + + if (!phba->lpfc_cmd_rsp_buf_pool) + goto fail_free_dma_buf_pool; + /* * Enable sr-iov virtual functions if supported and configured * through the module parameter. @@ -6342,6 +6365,13 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba) } return 0; + +fail_free_dma_buf_pool: + dma_pool_destroy(phba->lpfc_sg_dma_buf_pool); + phba->lpfc_sg_dma_buf_pool = NULL; +fail_free_mem: + lpfc_mem_free(phba); + return -ENOMEM; } /** @@ -6402,6 +6432,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) if (rc) return -ENODEV; + /* Allocate all driver workqueues here */ + + /* The lpfc_wq workqueue for deferred irq use */ + phba->wq = alloc_workqueue("lpfc_wq", WQ_MEM_RECLAIM, 0); + /* * Initialize timers used by driver */ @@ -6436,102 +6471,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) * The WQ create will allocate the ring. */ - /* - * 1 for cmd, 1 for rsp, NVME adds an extra one - * for boundary conditions in its max_sgl_segment template. - */ - extra = 2; - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) - extra++; - - /* - * It doesn't matter what family our adapter is in, we are - * limited to 2 Pages, 512 SGEs, for our SGL. - * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp - */ - max_buf_size = (2 * SLI4_PAGE_SIZE); - - /* - * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size - * used to create the sg_dma_buf_pool must be calculated. - */ - if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) { - /* - * The scsi_buf for a T10-DIF I/O holds the FCP cmnd, - * the FCP rsp, and a SGE. Sice we have no control - * over how many protection segments the SCSI Layer - * will hand us (ie: there could be one for every block - * in the IO), just allocate enough SGEs to accomidate - * our max amount and we need to limit lpfc_sg_seg_cnt - * to minimize the risk of running out. - */ - phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) + - sizeof(struct fcp_rsp) + max_buf_size; - - /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */ - phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT; - - /* - * If supporting DIF, reduce the seg count for scsi to - * allow room for the DIF sges. - */ - if (phba->cfg_enable_bg && - phba->cfg_sg_seg_cnt > LPFC_MAX_BG_SLI4_SEG_CNT_DIF) - phba->cfg_scsi_seg_cnt = LPFC_MAX_BG_SLI4_SEG_CNT_DIF; - else - phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt; - - } else { - /* - * The scsi_buf for a regular I/O holds the FCP cmnd, - * the FCP rsp, a SGE for each, and a SGE for up to - * cfg_sg_seg_cnt data segments. - */ - phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) + - sizeof(struct fcp_rsp) + - ((phba->cfg_sg_seg_cnt + extra) * - sizeof(struct sli4_sge)); - - /* Total SGEs for scsi_sg_list */ - phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + extra; - phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt; - - /* - * NOTE: if (phba->cfg_sg_seg_cnt + extra) <= 256 we only - * need to post 1 page for the SGL. - */ - } - - /* Limit to LPFC_MAX_NVME_SEG_CNT for NVME. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { - lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, - "6300 Reducing NVME sg segment " - "cnt to %d\n", - LPFC_MAX_NVME_SEG_CNT); - phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; - } else - phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; - } - - /* Initialize the host templates with the updated values. */ - lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt; - lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt; - lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt; - - if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ) - phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ; - else - phba->cfg_sg_dma_buf_size = - SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size); - - lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP, - "9087 sg_seg_cnt:%d dmabuf_size:%d " - "total:%d scsi:%d nvme:%d\n", - phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size, - phba->cfg_total_seg_cnt, phba->cfg_scsi_seg_cnt, - phba->cfg_nvme_seg_cnt); - /* Initialize buffer queue management fields */ INIT_LIST_HEAD(&phba->hbqs[LPFC_ELS_HBQ].hbq_buffer_list); phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_sli4_rb_alloc; @@ -6540,17 +6479,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* * Initialize the SLI Layer to run with lpfc SLI4 HBAs. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { - /* Initialize the Abort scsi buffer list used by driver */ - spin_lock_init(&phba->sli4_hba.abts_scsi_buf_list_lock); - INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_scsi_buf_list); - } + /* Initialize the Abort buffer list used by driver */ + spin_lock_init(&phba->sli4_hba.abts_io_buf_list_lock); + INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_io_buf_list); if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { /* Initialize the Abort nvme buffer list used by driver */ spin_lock_init(&phba->sli4_hba.abts_nvmet_buf_list_lock); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list); + spin_lock_init(&phba->sli4_hba.t_active_list_lock); + INIT_LIST_HEAD(&phba->sli4_hba.t_active_ctx_list); } /* This abort list used by worker thread */ @@ -6750,6 +6689,131 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) } } + /* + * 1 for cmd, 1 for rsp, NVME adds an extra one + * for boundary conditions in its max_sgl_segment template. + */ + extra = 2; + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + extra++; + + /* + * It doesn't matter what family our adapter is in, we are + * limited to 2 Pages, 512 SGEs, for our SGL. + * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp + */ + max_buf_size = (2 * SLI4_PAGE_SIZE); + + /* + * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size + * used to create the sg_dma_buf_pool must be calculated. + */ + if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) { + /* Both cfg_enable_bg and cfg_external_dif code paths */ + + /* + * The scsi_buf for a T10-DIF I/O holds the FCP cmnd, + * the FCP rsp, and a SGE. Sice we have no control + * over how many protection segments the SCSI Layer + * will hand us (ie: there could be one for every block + * in the IO), just allocate enough SGEs to accomidate + * our max amount and we need to limit lpfc_sg_seg_cnt + * to minimize the risk of running out. + */ + phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) + + sizeof(struct fcp_rsp) + max_buf_size; + + /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */ + phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT; + + /* + * If supporting DIF, reduce the seg count for scsi to + * allow room for the DIF sges. + */ + if (phba->cfg_enable_bg && + phba->cfg_sg_seg_cnt > LPFC_MAX_BG_SLI4_SEG_CNT_DIF) + phba->cfg_scsi_seg_cnt = LPFC_MAX_BG_SLI4_SEG_CNT_DIF; + else + phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt; + + } else { + /* + * The scsi_buf for a regular I/O holds the FCP cmnd, + * the FCP rsp, a SGE for each, and a SGE for up to + * cfg_sg_seg_cnt data segments. + */ + phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) + + sizeof(struct fcp_rsp) + + ((phba->cfg_sg_seg_cnt + extra) * + sizeof(struct sli4_sge)); + + /* Total SGEs for scsi_sg_list */ + phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + extra; + phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt; + + /* + * NOTE: if (phba->cfg_sg_seg_cnt + extra) <= 256 we only + * need to post 1 page for the SGL. + */ + } + + if (phba->cfg_xpsgl && !phba->nvmet_support) + phba->cfg_sg_dma_buf_size = LPFC_DEFAULT_XPSGL_SIZE; + else if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ) + phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ; + else + phba->cfg_sg_dma_buf_size = + SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size); + + phba->border_sge_num = phba->cfg_sg_dma_buf_size / + sizeof(struct sli4_sge); + + /* Limit to LPFC_MAX_NVME_SEG_CNT for NVME. */ + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { + lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, + "6300 Reducing NVME sg segment " + "cnt to %d\n", + LPFC_MAX_NVME_SEG_CNT); + phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; + } else + phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; + } + + /* Initialize the host templates with the updated values. */ + lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt; + lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt; + lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt; + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP, + "9087 sg_seg_cnt:%d dmabuf_size:%d " + "total:%d scsi:%d nvme:%d\n", + phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size, + phba->cfg_total_seg_cnt, phba->cfg_scsi_seg_cnt, + phba->cfg_nvme_seg_cnt); + + if (phba->cfg_sg_dma_buf_size < SLI4_PAGE_SIZE) + i = phba->cfg_sg_dma_buf_size; + else + i = SLI4_PAGE_SIZE; + + phba->lpfc_sg_dma_buf_pool = + dma_pool_create("lpfc_sg_dma_buf_pool", + &phba->pcidev->dev, + phba->cfg_sg_dma_buf_size, + i, 0); + if (!phba->lpfc_sg_dma_buf_pool) + goto out_free_bsmbx; + + phba->lpfc_cmd_rsp_buf_pool = + dma_pool_create("lpfc_cmd_rsp_buf_pool", + &phba->pcidev->dev, + sizeof(struct fcp_cmnd) + + sizeof(struct fcp_rsp), + i, 0); + if (!phba->lpfc_cmd_rsp_buf_pool) + goto out_free_sg_dma_buf; + mempool_free(mboxq, phba->mbox_mem_pool); /* Verify OAS is supported */ @@ -6761,12 +6825,12 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Verify all the SLI4 queues */ rc = lpfc_sli4_queue_verify(phba); if (rc) - goto out_free_bsmbx; + goto out_free_cmd_rsp_buf; /* Create driver internal CQE event pool */ rc = lpfc_sli4_cq_event_pool_create(phba); if (rc) - goto out_free_bsmbx; + goto out_free_cmd_rsp_buf; /* Initialize sgl lists per host */ lpfc_init_sgl_list(phba); @@ -6857,6 +6921,12 @@ out_free_active_sgl: lpfc_free_active_sgl(phba); out_destroy_cq_event_pool: lpfc_sli4_cq_event_pool_destroy(phba); +out_free_cmd_rsp_buf: + dma_pool_destroy(phba->lpfc_cmd_rsp_buf_pool); + phba->lpfc_cmd_rsp_buf_pool = NULL; +out_free_sg_dma_buf: + dma_pool_destroy(phba->lpfc_sg_dma_buf_pool); + phba->lpfc_sg_dma_buf_pool = NULL; out_free_bsmbx: lpfc_destroy_bootstrap_mbox(phba); out_free_mem: @@ -6983,12 +7053,6 @@ lpfc_setup_driver_resource_phase2(struct lpfc_hba *phba) return error; } - /* The lpfc_wq workqueue for deferred irq use, is only used for SLI4 */ - if (phba->sli_rev == LPFC_SLI_REV4) - phba->wq = alloc_workqueue("lpfc_wq", WQ_MEM_RECLAIM, 0); - else - phba->wq = NULL; - return 0; } @@ -7549,7 +7613,6 @@ lpfc_setup_bg(struct lpfc_hba *phba, struct Scsi_Host *shost) uint32_t old_mask; uint32_t old_guard; - int pagecnt = 10; if (phba->cfg_prot_mask && phba->cfg_prot_guard) { lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "1478 Registering BlockGuard with the " @@ -7586,56 +7649,6 @@ lpfc_setup_bg(struct lpfc_hba *phba, struct Scsi_Host *shost) "layer, Bad protection parameters: %d %d\n", old_mask, old_guard); } - - if (!_dump_buf_data) { - while (pagecnt) { - spin_lock_init(&_dump_buf_lock); - _dump_buf_data = - (char *) __get_free_pages(GFP_KERNEL, pagecnt); - if (_dump_buf_data) { - lpfc_printf_log(phba, KERN_ERR, LOG_BG, - "9043 BLKGRD: allocated %d pages for " - "_dump_buf_data at 0x%p\n", - (1 << pagecnt), _dump_buf_data); - _dump_buf_data_order = pagecnt; - memset(_dump_buf_data, 0, - ((1 << PAGE_SHIFT) << pagecnt)); - break; - } else - --pagecnt; - } - if (!_dump_buf_data_order) - lpfc_printf_log(phba, KERN_ERR, LOG_BG, - "9044 BLKGRD: ERROR unable to allocate " - "memory for hexdump\n"); - } else - lpfc_printf_log(phba, KERN_ERR, LOG_BG, - "9045 BLKGRD: already allocated _dump_buf_data=0x%p" - "\n", _dump_buf_data); - if (!_dump_buf_dif) { - while (pagecnt) { - _dump_buf_dif = - (char *) __get_free_pages(GFP_KERNEL, pagecnt); - if (_dump_buf_dif) { - lpfc_printf_log(phba, KERN_ERR, LOG_BG, - "9046 BLKGRD: allocated %d pages for " - "_dump_buf_dif at 0x%p\n", - (1 << pagecnt), _dump_buf_dif); - _dump_buf_dif_order = pagecnt; - memset(_dump_buf_dif, 0, - ((1 << PAGE_SHIFT) << pagecnt)); - break; - } else - --pagecnt; - } - if (!_dump_buf_dif_order) - lpfc_printf_log(phba, KERN_ERR, LOG_BG, - "9047 BLKGRD: ERROR unable to allocate " - "memory for hexdump\n"); - } else - lpfc_printf_log(phba, KERN_ERR, LOG_BG, - "9048 BLKGRD: already allocated _dump_buf_dif=0x%p\n", - _dump_buf_dif); } /** @@ -7660,8 +7673,6 @@ lpfc_post_init_setup(struct lpfc_hba *phba) */ shost = pci_get_drvdata(phba->pcidev); shost->can_queue = phba->cfg_hba_queue_depth - 10; - if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) - lpfc_setup_bg(phba, shost); lpfc_host_attrib_init(shost); @@ -8297,6 +8308,10 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) bf_get(lpfc_mbx_rd_conf_extnts_inuse, rd_config); phba->sli4_hba.max_cfg_param.max_xri = bf_get(lpfc_mbx_rd_conf_xri_count, rd_config); + /* Reduce resource usage in kdump environment */ + if (is_kdump_kernel() && + phba->sli4_hba.max_cfg_param.max_xri > 512) + phba->sli4_hba.max_cfg_param.max_xri = 512; phba->sli4_hba.max_cfg_param.xri_base = bf_get(lpfc_mbx_rd_conf_xri_base, rd_config); phba->sli4_hba.max_cfg_param.max_vpi = @@ -8370,11 +8385,6 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) */ qmin -= 4; - /* If NVME is configured, double the number of CQ/WQs needed */ - if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) && - !phba->nvmet_support) - qmin /= 2; - /* Check to see if there is enough for NVME */ if ((phba->cfg_irq_chann > qmin) || (phba->cfg_hdw_queue > qmin)) { @@ -8631,51 +8641,14 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba) } static int -lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx) +lpfc_alloc_io_wq_cq(struct lpfc_hba *phba, int idx) { struct lpfc_queue *qdesc; + u32 wqesize; int cpu; - cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ); - qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE, - phba->sli4_hba.cq_esize, - LPFC_CQE_EXP_COUNT, cpu); - if (!qdesc) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0508 Failed allocate fast-path NVME CQ (%d)\n", - wqidx); - return 1; - } - qdesc->qe_valid = 1; - qdesc->hdwq = wqidx; - qdesc->chann = cpu; - phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc; - - qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE, - LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT, - cpu); - if (!qdesc) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0509 Failed allocate fast-path NVME WQ (%d)\n", - wqidx); - return 1; - } - qdesc->hdwq = wqidx; - qdesc->chann = wqidx; - phba->sli4_hba.hdwq[wqidx].nvme_wq = qdesc; - list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list); - return 0; -} - -static int -lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) -{ - struct lpfc_queue *qdesc; - uint32_t wqesize; - int cpu; - - cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ); - /* Create Fast Path FCP CQs */ + cpu = lpfc_find_cpu_handle(phba, idx, LPFC_FIND_BY_HDWQ); + /* Create Fast Path IO CQs */ if (phba->enab_exp_wqcq_pages) /* Increase the CQ size when WQEs contain an embedded cdb */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE, @@ -8688,15 +8661,15 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) phba->sli4_hba.cq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx); + "0499 Failed allocate fast-path IO CQ (%d)\n", idx); return 1; } qdesc->qe_valid = 1; - qdesc->hdwq = wqidx; + qdesc->hdwq = idx; qdesc->chann = cpu; - phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc; + phba->sli4_hba.hdwq[idx].io_cq = qdesc; - /* Create Fast Path FCP WQs */ + /* Create Fast Path IO WQs */ if (phba->enab_exp_wqcq_pages) { /* Increase the WQ size when WQEs contain an embedded cdb */ wqesize = (phba->fcp_embed_io) ? @@ -8711,13 +8684,13 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0503 Failed allocate fast-path FCP WQ (%d)\n", - wqidx); + "0503 Failed allocate fast-path IO WQ (%d)\n", + idx); return 1; } - qdesc->hdwq = wqidx; - qdesc->chann = wqidx; - phba->sli4_hba.hdwq[wqidx].fcp_wq = qdesc; + qdesc->hdwq = idx; + qdesc->chann = cpu; + phba->sli4_hba.hdwq[idx].io_wq = qdesc; list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list); return 0; } @@ -8740,8 +8713,10 @@ int lpfc_sli4_queue_create(struct lpfc_hba *phba) { struct lpfc_queue *qdesc; - int idx, eqidx, cpu; + int idx, cpu, eqcpu; struct lpfc_sli4_hdw_queue *qp; + struct lpfc_vector_map_info *cpup; + struct lpfc_vector_map_info *eqcpup; struct lpfc_eq_intr_info *eqi; /* @@ -8779,12 +8754,13 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) qp->get_io_bufs = 0; qp->put_io_bufs = 0; qp->total_io_bufs = 0; - spin_lock_init(&qp->abts_scsi_buf_list_lock); - INIT_LIST_HEAD(&qp->lpfc_abts_scsi_buf_list); + spin_lock_init(&qp->abts_io_buf_list_lock); + INIT_LIST_HEAD(&qp->lpfc_abts_io_buf_list); qp->abts_scsi_io_bufs = 0; - spin_lock_init(&qp->abts_nvme_buf_list_lock); - INIT_LIST_HEAD(&qp->lpfc_abts_nvme_buf_list); qp->abts_nvme_io_bufs = 0; + INIT_LIST_HEAD(&qp->sgl_list); + INIT_LIST_HEAD(&qp->cmd_rsp_buf_list); + spin_lock_init(&qp->hdwq_lock); } } @@ -8826,76 +8802,86 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list); /* Create HBA Event Queues (EQs) */ - for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { - /* determine EQ affinity */ - eqidx = lpfc_find_eq_handle(phba, idx); - cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ); - /* - * If there are more Hardware Queues than available - * EQs, multiple Hardware Queues may share a common EQ. + for_each_present_cpu(cpu) { + /* We only want to create 1 EQ per vector, even though + * multiple CPUs might be using that vector. so only + * selects the CPUs that are LPFC_CPU_FIRST_IRQ. */ - if (idx >= phba->cfg_irq_chann) { - /* Share an existing EQ */ - phba->sli4_hba.hdwq[idx].hba_eq = - phba->sli4_hba.hdwq[eqidx].hba_eq; + cpup = &phba->sli4_hba.cpu_map[cpu]; + if (!(cpup->flag & LPFC_CPU_FIRST_IRQ)) continue; - } - /* Create an EQ */ + + /* Get a ptr to the Hardware Queue associated with this CPU */ + qp = &phba->sli4_hba.hdwq[cpup->hdwq]; + + /* Allocate an EQ */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.eq_esize, phba->sli4_hba.eq_ecount, cpu); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0497 Failed allocate EQ (%d)\n", idx); + "0497 Failed allocate EQ (%d)\n", + cpup->hdwq); goto out_error; } qdesc->qe_valid = 1; - qdesc->hdwq = idx; - - /* Save the CPU this EQ is affinitised to */ - qdesc->chann = cpu; - phba->sli4_hba.hdwq[idx].hba_eq = qdesc; + qdesc->hdwq = cpup->hdwq; + qdesc->chann = cpu; /* First CPU this EQ is affinitized to */ qdesc->last_cpu = qdesc->chann; + + /* Save the allocated EQ in the Hardware Queue */ + qp->hba_eq = qdesc; + eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu); list_add(&qdesc->cpu_list, &eqi->list); } + /* Now we need to populate the other Hardware Queues, that share + * an IRQ vector, with the associated EQ ptr. + */ + for_each_present_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + + /* Check for EQ already allocated in previous loop */ + if (cpup->flag & LPFC_CPU_FIRST_IRQ) + continue; + + /* Check for multiple CPUs per hdwq */ + qp = &phba->sli4_hba.hdwq[cpup->hdwq]; + if (qp->hba_eq) + continue; + + /* We need to share an EQ for this hdwq */ + eqcpu = lpfc_find_cpu_handle(phba, cpup->eq, LPFC_FIND_BY_EQ); + eqcpup = &phba->sli4_hba.cpu_map[eqcpu]; + qp->hba_eq = phba->sli4_hba.hdwq[eqcpup->hdwq].hba_eq; + } - /* Allocate SCSI SLI4 CQ/WQs */ + /* Allocate IO Path SLI4 CQ/WQs */ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { - if (lpfc_alloc_fcp_wq_cq(phba, idx)) + if (lpfc_alloc_io_wq_cq(phba, idx)) goto out_error; } - /* Allocate NVME SLI4 CQ/WQs */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { - if (lpfc_alloc_nvme_wq_cq(phba, idx)) - goto out_error; - } - - if (phba->nvmet_support) { - for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) { - cpu = lpfc_find_cpu_handle(phba, idx, - LPFC_FIND_BY_HDWQ); - qdesc = lpfc_sli4_queue_alloc( - phba, + if (phba->nvmet_support) { + for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) { + cpu = lpfc_find_cpu_handle(phba, idx, + LPFC_FIND_BY_HDWQ); + qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.cq_esize, phba->sli4_hba.cq_ecount, cpu); - if (!qdesc) { - lpfc_printf_log( - phba, KERN_ERR, LOG_INIT, + if (!qdesc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3142 Failed allocate NVME " "CQ Set (%d)\n", idx); - goto out_error; - } - qdesc->qe_valid = 1; - qdesc->hdwq = idx; - qdesc->chann = cpu; - phba->sli4_hba.nvmet_cqset[idx] = qdesc; + goto out_error; } + qdesc->qe_valid = 1; + qdesc->hdwq = idx; + qdesc->chann = cpu; + phba->sli4_hba.nvmet_cqset[idx] = qdesc; } } @@ -8926,7 +8912,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) goto out_error; } qdesc->qe_valid = 1; - qdesc->chann = 0; + qdesc->chann = cpu; phba->sli4_hba.els_cq = qdesc; @@ -8944,7 +8930,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) "0505 Failed allocate slow-path MQ\n"); goto out_error; } - qdesc->chann = 0; + qdesc->chann = cpu; phba->sli4_hba.mbx_wq = qdesc; /* @@ -8960,7 +8946,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) "0504 Failed allocate slow-path ELS WQ\n"); goto out_error; } - qdesc->chann = 0; + qdesc->chann = cpu; phba->sli4_hba.els_wq = qdesc; list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list); @@ -8974,7 +8960,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) "6079 Failed allocate NVME LS CQ\n"); goto out_error; } - qdesc->chann = 0; + qdesc->chann = cpu; qdesc->qe_valid = 1; phba->sli4_hba.nvmels_cq = qdesc; @@ -8987,7 +8973,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) "6080 Failed allocate NVME LS WQ\n"); goto out_error; } - qdesc->chann = 0; + qdesc->chann = cpu; phba->sli4_hba.nvmels_wq = qdesc; list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list); } @@ -9069,7 +9055,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) } } -#if defined(BUILD_NVME) /* Clear NVME stats */ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { @@ -9077,7 +9062,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat)); } } -#endif /* Clear SCSI stats */ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { @@ -9122,22 +9106,28 @@ static inline void lpfc_sli4_release_hdwq(struct lpfc_hba *phba) { struct lpfc_sli4_hdw_queue *hdwq; + struct lpfc_queue *eq; uint32_t idx; hdwq = phba->sli4_hba.hdwq; - for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { - if (idx < phba->cfg_irq_chann) - lpfc_sli4_queue_free(hdwq[idx].hba_eq); - hdwq[idx].hba_eq = NULL; - lpfc_sli4_queue_free(hdwq[idx].fcp_cq); - lpfc_sli4_queue_free(hdwq[idx].nvme_cq); - lpfc_sli4_queue_free(hdwq[idx].fcp_wq); - lpfc_sli4_queue_free(hdwq[idx].nvme_wq); - hdwq[idx].fcp_cq = NULL; - hdwq[idx].nvme_cq = NULL; - hdwq[idx].fcp_wq = NULL; - hdwq[idx].nvme_wq = NULL; + /* Loop thru all Hardware Queues */ + for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { + /* Free the CQ/WQ corresponding to the Hardware Queue */ + lpfc_sli4_queue_free(hdwq[idx].io_cq); + lpfc_sli4_queue_free(hdwq[idx].io_wq); + hdwq[idx].io_cq = NULL; + hdwq[idx].io_wq = NULL; + if (phba->cfg_xpsgl && !phba->nvmet_support) + lpfc_free_sgl_per_hdwq(phba, &hdwq[idx]); + lpfc_free_cmd_rsp_buf_per_hdwq(phba, &hdwq[idx]); + } + /* Loop thru all IRQ vectors */ + for (idx = 0; idx < phba->cfg_irq_chann; idx++) { + /* Free the EQ corresponding to the IRQ vector */ + eq = phba->sli4_hba.hba_eq_hdl[idx].eq; + lpfc_sli4_queue_free(eq); + phba->sli4_hba.hba_eq_hdl[idx].eq = NULL; } } @@ -9316,21 +9306,21 @@ static void lpfc_setup_cq_lookup(struct lpfc_hba *phba) { struct lpfc_queue *eq, *childq; - struct lpfc_sli4_hdw_queue *qp; int qidx; - qp = phba->sli4_hba.hdwq; memset(phba->sli4_hba.cq_lookup, 0, (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1))); + /* Loop thru all IRQ vectors */ for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { - eq = qp[qidx].hba_eq; + /* Get the EQ corresponding to the IRQ vector */ + eq = phba->sli4_hba.hba_eq_hdl[qidx].eq; if (!eq) continue; + /* Loop through all CQs associated with that EQ */ list_for_each_entry(childq, &eq->child_list, list) { if (childq->queue_id > phba->sli4_hba.cq_max) continue; - if ((childq->subtype == LPFC_FCP) || - (childq->subtype == LPFC_NVME)) + if (childq->subtype == LPFC_IO) phba->sli4_hba.cq_lookup[childq->queue_id] = childq; } @@ -9354,9 +9344,10 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) { uint32_t shdr_status, shdr_add_status; union lpfc_sli4_cfg_shdr *shdr; + struct lpfc_vector_map_info *cpup; struct lpfc_sli4_hdw_queue *qp; LPFC_MBOXQ_t *mboxq; - int qidx; + int qidx, cpu; uint32_t length, usdelay; int rc = -ENOMEM; @@ -9417,57 +9408,60 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) rc = -ENOMEM; goto out_error; } + + /* Loop thru all IRQ vectors */ for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { - if (!qp[qidx].hba_eq) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0522 Fast-path EQ (%d) not " - "allocated\n", qidx); - rc = -ENOMEM; - goto out_destroy; - } - rc = lpfc_eq_create(phba, qp[qidx].hba_eq, - phba->cfg_fcp_imax); - if (rc) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0523 Failed setup of fast-path EQ " - "(%d), rc = 0x%x\n", qidx, - (uint32_t)rc); - goto out_destroy; - } - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "2584 HBA EQ setup: queue[%d]-id=%d\n", qidx, - qp[qidx].hba_eq->queue_id); - } + /* Create HBA Event Queues (EQs) in order */ + for_each_present_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { - rc = lpfc_create_wq_cq(phba, - qp[qidx].hba_eq, - qp[qidx].nvme_cq, - qp[qidx].nvme_wq, - &phba->sli4_hba.hdwq[qidx].nvme_cq_map, - qidx, LPFC_NVME); + /* Look for the CPU thats using that vector with + * LPFC_CPU_FIRST_IRQ set. + */ + if (!(cpup->flag & LPFC_CPU_FIRST_IRQ)) + continue; + if (qidx != cpup->eq) + continue; + + /* Create an EQ for that vector */ + rc = lpfc_eq_create(phba, qp[cpup->hdwq].hba_eq, + phba->cfg_fcp_imax); if (rc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "6123 Failed to setup fastpath " - "NVME WQ/CQ (%d), rc = 0x%x\n", - qidx, (uint32_t)rc); + "0523 Failed setup of fast-path" + " EQ (%d), rc = 0x%x\n", + cpup->eq, (uint32_t)rc); goto out_destroy; } + + /* Save the EQ for that vector in the hba_eq_hdl */ + phba->sli4_hba.hba_eq_hdl[cpup->eq].eq = + qp[cpup->hdwq].hba_eq; + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "2584 HBA EQ setup: queue[%d]-id=%d\n", + cpup->eq, + qp[cpup->hdwq].hba_eq->queue_id); } } + /* Loop thru all Hardware Queues */ for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { + cpu = lpfc_find_cpu_handle(phba, qidx, LPFC_FIND_BY_HDWQ); + cpup = &phba->sli4_hba.cpu_map[cpu]; + + /* Create the CQ/WQ corresponding to the Hardware Queue */ rc = lpfc_create_wq_cq(phba, - qp[qidx].hba_eq, - qp[qidx].fcp_cq, - qp[qidx].fcp_wq, - &phba->sli4_hba.hdwq[qidx].fcp_cq_map, - qidx, LPFC_FCP); + phba->sli4_hba.hdwq[cpup->hdwq].hba_eq, + qp[qidx].io_cq, + qp[qidx].io_wq, + &phba->sli4_hba.hdwq[qidx].io_cq_map, + qidx, + LPFC_IO); if (rc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0535 Failed to setup fastpath " - "FCP WQ/CQ (%d), rc = 0x%x\n", + "IO WQ/CQ (%d), rc = 0x%x\n", qidx, (uint32_t)rc); goto out_destroy; } @@ -9711,6 +9705,7 @@ void lpfc_sli4_queue_unset(struct lpfc_hba *phba) { struct lpfc_sli4_hdw_queue *qp; + struct lpfc_queue *eq; int qidx; /* Unset mailbox command work queue */ @@ -9762,14 +9757,18 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba) /* Unset fast-path SLI4 queues */ if (phba->sli4_hba.hdwq) { + /* Loop thru all Hardware Queues */ for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { + /* Destroy the CQ/WQ corresponding to Hardware Queue */ qp = &phba->sli4_hba.hdwq[qidx]; - lpfc_wq_destroy(phba, qp->fcp_wq); - lpfc_wq_destroy(phba, qp->nvme_wq); - lpfc_cq_destroy(phba, qp->fcp_cq); - lpfc_cq_destroy(phba, qp->nvme_cq); - if (qidx < phba->cfg_irq_chann) - lpfc_eq_destroy(phba, qp->hba_eq); + lpfc_wq_destroy(phba, qp->io_wq); + lpfc_cq_destroy(phba, qp->io_cq); + } + /* Loop thru all IRQ vectors */ + for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { + /* Destroy the EQ corresponding to the IRQ vector */ + eq = phba->sli4_hba.hba_eq_hdl[qidx].eq; + lpfc_eq_destroy(phba, eq); } } @@ -10559,11 +10558,12 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba) } /** - * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ + * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified Queue * @phba: pointer to lpfc hba data structure. * @id: EQ vector index or Hardware Queue index * @match: LPFC_FIND_BY_EQ = match by EQ * LPFC_FIND_BY_HDWQ = match by Hardware Queue + * Return the CPU that matches the selection criteria */ static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match) @@ -10571,40 +10571,27 @@ lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match) struct lpfc_vector_map_info *cpup; int cpu; - /* Find the desired phys_id for the specified EQ */ + /* Loop through all CPUs */ for_each_present_cpu(cpu) { cpup = &phba->sli4_hba.cpu_map[cpu]; + + /* If we are matching by EQ, there may be multiple CPUs using + * using the same vector, so select the one with + * LPFC_CPU_FIRST_IRQ set. + */ if ((match == LPFC_FIND_BY_EQ) && + (cpup->flag & LPFC_CPU_FIRST_IRQ) && (cpup->irq != LPFC_VECTOR_MAP_EMPTY) && (cpup->eq == id)) return cpu; + + /* If matching by HDWQ, select the first CPU that matches */ if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id)) return cpu; } return 0; } -/** - * lpfc_find_eq_handle - Find the EQ that corresponds to the specified - * Hardware Queue - * @phba: pointer to lpfc hba data structure. - * @hdwq: Hardware Queue index - */ -static uint16_t -lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq) -{ - struct lpfc_vector_map_info *cpup; - int cpu; - - /* Find the desired phys_id for the specified EQ */ - for_each_present_cpu(cpu) { - cpup = &phba->sli4_hba.cpu_map[cpu]; - if (cpup->hdwq == hdwq) - return cpup->eq; - } - return 0; -} - #ifdef CONFIG_X86 /** * lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded @@ -10645,24 +10632,31 @@ lpfc_find_hyper(struct lpfc_hba *phba, int cpu, static void lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) { - int i, cpu, idx; + int i, cpu, idx, next_idx, new_cpu, start_cpu, first_cpu; int max_phys_id, min_phys_id; int max_core_id, min_core_id; struct lpfc_vector_map_info *cpup; + struct lpfc_vector_map_info *new_cpup; const struct cpumask *maskp; #ifdef CONFIG_X86 struct cpuinfo_x86 *cpuinfo; #endif /* Init cpu_map array */ - memset(phba->sli4_hba.cpu_map, 0xff, - (sizeof(struct lpfc_vector_map_info) * - phba->sli4_hba.num_possible_cpu)); + for_each_possible_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + cpup->phys_id = LPFC_VECTOR_MAP_EMPTY; + cpup->core_id = LPFC_VECTOR_MAP_EMPTY; + cpup->hdwq = LPFC_VECTOR_MAP_EMPTY; + cpup->eq = LPFC_VECTOR_MAP_EMPTY; + cpup->irq = LPFC_VECTOR_MAP_EMPTY; + cpup->flag = 0; + } max_phys_id = 0; - min_phys_id = 0xffff; + min_phys_id = LPFC_VECTOR_MAP_EMPTY; max_core_id = 0; - min_core_id = 0xffff; + min_core_id = LPFC_VECTOR_MAP_EMPTY; /* Update CPU map with physical id and core id of each CPU */ for_each_present_cpu(cpu) { @@ -10671,18 +10665,17 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) cpuinfo = &cpu_data(cpu); cpup->phys_id = cpuinfo->phys_proc_id; cpup->core_id = cpuinfo->cpu_core_id; - cpup->hyper = lpfc_find_hyper(phba, cpu, - cpup->phys_id, cpup->core_id); + if (lpfc_find_hyper(phba, cpu, cpup->phys_id, cpup->core_id)) + cpup->flag |= LPFC_CPU_MAP_HYPER; #else /* No distinction between CPUs for other platforms */ cpup->phys_id = 0; cpup->core_id = cpu; - cpup->hyper = 0; #endif lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "3328 CPU physid %d coreid %d\n", - cpup->phys_id, cpup->core_id); + "3328 CPU %d physid %d coreid %d flag x%x\n", + cpu, cpup->phys_id, cpup->core_id, cpup->flag); if (cpup->phys_id > max_phys_id) max_phys_id = cpup->phys_id; @@ -10703,23 +10696,269 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) eqi->icnt = 0; } + /* This loop sets up all CPUs that are affinitized with a + * irq vector assigned to the driver. All affinitized CPUs + * will get a link to that vectors IRQ and EQ. + * + * NULL affinity mask handling: + * If irq count is greater than one, log an error message. + * If the null mask is received for the first irq, find the + * first present cpu, and assign the eq index to ensure at + * least one EQ is assigned. + */ for (idx = 0; idx < phba->cfg_irq_chann; idx++) { + /* Get a CPU mask for all CPUs affinitized to this vector */ maskp = pci_irq_get_affinity(phba->pcidev, idx); - if (!maskp) - continue; + if (!maskp) { + if (phba->cfg_irq_chann > 1) + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3329 No affinity mask found " + "for vector %d (%d)\n", + idx, phba->cfg_irq_chann); + if (!idx) { + cpu = cpumask_first(cpu_present_mask); + cpup = &phba->sli4_hba.cpu_map[cpu]; + cpup->eq = idx; + cpup->irq = pci_irq_vector(phba->pcidev, idx); + cpup->flag |= LPFC_CPU_FIRST_IRQ; + } + break; + } + i = 0; + /* Loop through all CPUs associated with vector idx */ for_each_cpu_and(cpu, maskp, cpu_present_mask) { + /* Set the EQ index and IRQ for that vector */ cpup = &phba->sli4_hba.cpu_map[cpu]; cpup->eq = idx; - cpup->hdwq = idx; cpup->irq = pci_irq_vector(phba->pcidev, idx); - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + /* If this is the first CPU thats assigned to this + * vector, set LPFC_CPU_FIRST_IRQ. + */ + if (!i) + cpup->flag |= LPFC_CPU_FIRST_IRQ; + i++; + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "3336 Set Affinity: CPU %d " - "hdwq %d irq %d\n", - cpu, cpup->hdwq, cpup->irq); + "irq %d eq %d flag x%x\n", + cpu, cpup->irq, cpup->eq, cpup->flag); + } + } + + /* After looking at each irq vector assigned to this pcidev, its + * possible to see that not ALL CPUs have been accounted for. + * Next we will set any unassigned (unaffinitized) cpu map + * entries to a IRQ on the same phys_id. + */ + first_cpu = cpumask_first(cpu_present_mask); + start_cpu = first_cpu; + + for_each_present_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + + /* Is this CPU entry unassigned */ + if (cpup->eq == LPFC_VECTOR_MAP_EMPTY) { + /* Mark CPU as IRQ not assigned by the kernel */ + cpup->flag |= LPFC_CPU_MAP_UNASSIGN; + + /* If so, find a new_cpup thats on the the SAME + * phys_id as cpup. start_cpu will start where we + * left off so all unassigned entries don't get assgined + * the IRQ of the first entry. + */ + new_cpu = start_cpu; + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; + if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) && + (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY) && + (new_cpup->phys_id == cpup->phys_id)) + goto found_same; + new_cpu = cpumask_next( + new_cpu, cpu_present_mask); + if (new_cpu == nr_cpumask_bits) + new_cpu = first_cpu; + } + /* At this point, we leave the CPU as unassigned */ + continue; +found_same: + /* We found a matching phys_id, so copy the IRQ info */ + cpup->eq = new_cpup->eq; + cpup->irq = new_cpup->irq; + + /* Bump start_cpu to the next slot to minmize the + * chance of having multiple unassigned CPU entries + * selecting the same IRQ. + */ + start_cpu = cpumask_next(new_cpu, cpu_present_mask); + if (start_cpu == nr_cpumask_bits) + start_cpu = first_cpu; + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3337 Set Affinity: CPU %d " + "irq %d from id %d same " + "phys_id (%d)\n", + cpu, cpup->irq, new_cpu, cpup->phys_id); + } + } + + /* Set any unassigned cpu map entries to a IRQ on any phys_id */ + start_cpu = first_cpu; + + for_each_present_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + + /* Is this entry unassigned */ + if (cpup->eq == LPFC_VECTOR_MAP_EMPTY) { + /* Mark it as IRQ not assigned by the kernel */ + cpup->flag |= LPFC_CPU_MAP_UNASSIGN; + + /* If so, find a new_cpup thats on ANY phys_id + * as the cpup. start_cpu will start where we + * left off so all unassigned entries don't get + * assigned the IRQ of the first entry. + */ + new_cpu = start_cpu; + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; + if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) && + (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY)) + goto found_any; + new_cpu = cpumask_next( + new_cpu, cpu_present_mask); + if (new_cpu == nr_cpumask_bits) + new_cpu = first_cpu; + } + /* We should never leave an entry unassigned */ + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3339 Set Affinity: CPU %d " + "irq %d UNASSIGNED\n", + cpup->hdwq, cpup->irq); + continue; +found_any: + /* We found an available entry, copy the IRQ info */ + cpup->eq = new_cpup->eq; + cpup->irq = new_cpup->irq; + + /* Bump start_cpu to the next slot to minmize the + * chance of having multiple unassigned CPU entries + * selecting the same IRQ. + */ + start_cpu = cpumask_next(new_cpu, cpu_present_mask); + if (start_cpu == nr_cpumask_bits) + start_cpu = first_cpu; + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3338 Set Affinity: CPU %d " + "irq %d from id %d (%d/%d)\n", + cpu, cpup->irq, new_cpu, + new_cpup->phys_id, new_cpup->core_id); + } + } + + /* Assign hdwq indices that are unique across all cpus in the map + * that are also FIRST_CPUs. + */ + idx = 0; + for_each_present_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + + /* Only FIRST IRQs get a hdwq index assignment. */ + if (!(cpup->flag & LPFC_CPU_FIRST_IRQ)) + continue; + + /* 1 to 1, the first LPFC_CPU_FIRST_IRQ cpus to a unique hdwq */ + cpup->hdwq = idx; + idx++; + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3333 Set Affinity: CPU %d (phys %d core %d): " + "hdwq %d eq %d irq %d flg x%x\n", + cpu, cpup->phys_id, cpup->core_id, + cpup->hdwq, cpup->eq, cpup->irq, cpup->flag); + } + /* Finally we need to associate a hdwq with each cpu_map entry + * This will be 1 to 1 - hdwq to cpu, unless there are less + * hardware queues then CPUs. For that case we will just round-robin + * the available hardware queues as they get assigned to CPUs. + * The next_idx is the idx from the FIRST_CPU loop above to account + * for irq_chann < hdwq. The idx is used for round-robin assignments + * and needs to start at 0. + */ + next_idx = idx; + start_cpu = 0; + idx = 0; + for_each_present_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + + /* FIRST cpus are already mapped. */ + if (cpup->flag & LPFC_CPU_FIRST_IRQ) + continue; + + /* If the cfg_irq_chann < cfg_hdw_queue, set the hdwq + * of the unassigned cpus to the next idx so that all + * hdw queues are fully utilized. + */ + if (next_idx < phba->cfg_hdw_queue) { + cpup->hdwq = next_idx; + next_idx++; + continue; + } + + /* Not a First CPU and all hdw_queues are used. Reuse a + * Hardware Queue for another CPU, so be smart about it + * and pick one that has its IRQ/EQ mapped to the same phys_id + * (CPU package) and core_id. + */ + new_cpu = start_cpu; + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; + if (new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY && + new_cpup->phys_id == cpup->phys_id && + new_cpup->core_id == cpup->core_id) { + goto found_hdwq; + } + new_cpu = cpumask_next(new_cpu, cpu_present_mask); + if (new_cpu == nr_cpumask_bits) + new_cpu = first_cpu; } + + /* If we can't match both phys_id and core_id, + * settle for just a phys_id match. + */ + new_cpu = start_cpu; + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; + if (new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY && + new_cpup->phys_id == cpup->phys_id) + goto found_hdwq; + + new_cpu = cpumask_next(new_cpu, cpu_present_mask); + if (new_cpu == nr_cpumask_bits) + new_cpu = first_cpu; + } + + /* Otherwise just round robin on cfg_hdw_queue */ + cpup->hdwq = idx % phba->cfg_hdw_queue; + idx++; + goto logit; + found_hdwq: + /* We found an available entry, copy the IRQ info */ + start_cpu = cpumask_next(new_cpu, cpu_present_mask); + if (start_cpu == nr_cpumask_bits) + start_cpu = first_cpu; + cpup->hdwq = new_cpup->hdwq; + logit: + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3335 Set Affinity: CPU %d (phys %d core %d): " + "hdwq %d eq %d irq %d flg x%x\n", + cpu, cpup->phys_id, cpup->core_id, + cpup->hdwq, cpup->eq, cpup->irq, cpup->flag); } + + /* The cpu_map array will be used later during initialization + * when EQ / CQ / WQs are allocated and configured. + */ return; } @@ -10805,10 +11044,10 @@ vec_fail_out: * @phba: pointer to lpfc hba data structure. * * This routine is invoked to enable the MSI interrupt mode to device with - * SLI-4 interface spec. The kernel function pci_enable_msi() is called - * to enable the MSI vector. The device driver is responsible for calling - * the request_irq() to register MSI vector with a interrupt the handler, - * which is done in this function. + * SLI-4 interface spec. The kernel function pci_alloc_irq_vectors() is + * called to enable the MSI vector. The device driver is responsible for + * calling the request_irq() to register MSI vector with a interrupt the + * handler, which is done in this function. * * Return codes * 0 - successful @@ -10819,20 +11058,21 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba) { int rc, index; - rc = pci_enable_msi(phba->pcidev); - if (!rc) + rc = pci_alloc_irq_vectors(phba->pcidev, 1, 1, + PCI_IRQ_MSI | PCI_IRQ_AFFINITY); + if (rc > 0) lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "0487 PCI enable MSI mode success.\n"); else { lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "0488 PCI enable MSI mode failed (%d)\n", rc); - return rc; + return rc ? rc : -1; } rc = request_irq(phba->pcidev->irq, lpfc_sli4_intr_handler, 0, LPFC_DRIVER_NAME, phba); if (rc) { - pci_disable_msi(phba->pcidev); + pci_free_irq_vectors(phba->pcidev); lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "0490 MSI request_irq failed (%d)\n", rc); return rc; @@ -10998,11 +11238,10 @@ static void lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) { struct lpfc_sli4_hdw_queue *qp; - int idx, ccnt, fcnt; + int idx, ccnt; int wait_time = 0; int io_xri_cmpl = 1; int nvmet_xri_cmpl = 1; - int fcp_xri_cmpl = 1; int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list); /* Driver just aborted IOs during the hba_unset process. Pause @@ -11016,32 +11255,21 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) lpfc_nvme_wait_for_io_drain(phba); ccnt = 0; - fcnt = 0; for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { qp = &phba->sli4_hba.hdwq[idx]; - fcp_xri_cmpl = list_empty( - &qp->lpfc_abts_scsi_buf_list); - if (!fcp_xri_cmpl) /* if list is NOT empty */ - fcnt++; - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - io_xri_cmpl = list_empty( - &qp->lpfc_abts_nvme_buf_list); - if (!io_xri_cmpl) /* if list is NOT empty */ - ccnt++; - } + io_xri_cmpl = list_empty(&qp->lpfc_abts_io_buf_list); + if (!io_xri_cmpl) /* if list is NOT empty */ + ccnt++; } if (ccnt) io_xri_cmpl = 0; - if (fcnt) - fcp_xri_cmpl = 0; if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { nvmet_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); } - while (!fcp_xri_cmpl || !els_xri_cmpl || !io_xri_cmpl || - !nvmet_xri_cmpl) { + while (!els_xri_cmpl || !io_xri_cmpl || !nvmet_xri_cmpl) { if (wait_time > LPFC_XRI_EXCH_BUSY_WAIT_TMO) { if (!nvmet_xri_cmpl) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -11050,12 +11278,7 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) wait_time/1000); if (!io_xri_cmpl) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "6100 NVME XRI exchange busy " - "wait time: %d seconds.\n", - wait_time/1000); - if (!fcp_xri_cmpl) - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "2877 FCP XRI exchange busy " + "6100 IO XRI exchange busy " "wait time: %d seconds.\n", wait_time/1000); if (!els_xri_cmpl) @@ -11071,24 +11294,15 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) } ccnt = 0; - fcnt = 0; for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { qp = &phba->sli4_hba.hdwq[idx]; - fcp_xri_cmpl = list_empty( - &qp->lpfc_abts_scsi_buf_list); - if (!fcp_xri_cmpl) /* if list is NOT empty */ - fcnt++; - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - io_xri_cmpl = list_empty( - &qp->lpfc_abts_nvme_buf_list); - if (!io_xri_cmpl) /* if list is NOT empty */ - ccnt++; - } + io_xri_cmpl = list_empty( + &qp->lpfc_abts_io_buf_list); + if (!io_xri_cmpl) /* if list is NOT empty */ + ccnt++; } if (ccnt) io_xri_cmpl = 0; - if (fcnt) - fcp_xri_cmpl = 0; if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { nvmet_xri_cmpl = list_empty( @@ -11331,24 +11545,56 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) mbx_sli4_parameters); phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters); phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters); - phba->nvme_support = (bf_get(cfg_nvme, mbx_sli4_parameters) && - bf_get(cfg_xib, mbx_sli4_parameters)); - - if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP) || - !phba->nvme_support) { - phba->nvme_support = 0; - phba->nvmet_support = 0; - phba->cfg_nvmet_mrq = 0; - lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME, - "6101 Disabling NVME support: " - "Not supported by firmware: %d %d\n", - bf_get(cfg_nvme, mbx_sli4_parameters), - bf_get(cfg_xib, mbx_sli4_parameters)); - - /* If firmware doesn't support NVME, just use SCSI support */ - if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)) - return -ENODEV; - phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP; + + /* Check for Extended Pre-Registered SGL support */ + phba->cfg_xpsgl = bf_get(cfg_xpsgl, mbx_sli4_parameters); + + /* Check for firmware nvme support */ + rc = (bf_get(cfg_nvme, mbx_sli4_parameters) && + bf_get(cfg_xib, mbx_sli4_parameters)); + + if (rc) { + /* Save this to indicate the Firmware supports NVME */ + sli4_params->nvme = 1; + + /* Firmware NVME support, check driver FC4 NVME support */ + if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_NVME, + "6133 Disabling NVME support: " + "FC4 type not supported: x%x\n", + phba->cfg_enable_fc4_type); + goto fcponly; + } + } else { + /* No firmware NVME support, check driver FC4 NVME support */ + sli4_params->nvme = 0; + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME, + "6101 Disabling NVME support: Not " + "supported by firmware (%d %d) x%x\n", + bf_get(cfg_nvme, mbx_sli4_parameters), + bf_get(cfg_xib, mbx_sli4_parameters), + phba->cfg_enable_fc4_type); +fcponly: + phba->nvme_support = 0; + phba->nvmet_support = 0; + phba->cfg_nvmet_mrq = 0; + phba->cfg_nvme_seg_cnt = 0; + + /* If no FC4 type support, move to just SCSI support */ + if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)) + return -ENODEV; + phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP; + } + } + + /* If the NVME FC4 type is enabled, scale the sg_seg_cnt to + * accommodate 512K and 1M IOs in a single nvme buf and supply + * enough NVME LS iocb buffers for larger connectivity counts. + */ + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + phba->cfg_sg_seg_cnt = LPFC_MAX_NVME_SEG_CNT; + phba->cfg_iocb_cnt = 5; } /* Only embed PBDE for if_type 6, PBDE support requires xib be set */ @@ -11415,6 +11661,14 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) else phba->mds_diags_support = 0; + /* + * Check if the SLI port supports NSLER + */ + if (bf_get(cfg_nsler, mbx_sli4_parameters)) + phba->nsler = 1; + else + phba->nsler = 0; + return 0; } @@ -11843,7 +12097,7 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba) lpfc_scsi_dev_block(phba); /* Flush all driver's outstanding SCSI I/Os as we are to reset */ - lpfc_sli_flush_fcp_rings(phba); + lpfc_sli_flush_io_rings(phba); /* stop all timers */ lpfc_stop_hba_timers(phba); @@ -11873,7 +12127,7 @@ lpfc_sli_prep_dev_for_perm_failure(struct lpfc_hba *phba) lpfc_stop_hba_timers(phba); /* Clean up all driver's outstanding SCSI I/Os */ - lpfc_sli_flush_fcp_rings(phba); + lpfc_sli_flush_io_rings(phba); } /** @@ -12645,12 +12899,8 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba) /* Block all SCSI devices' I/Os on the host */ lpfc_scsi_dev_block(phba); - /* Flush all driver's outstanding SCSI I/Os as we are to reset */ - lpfc_sli_flush_fcp_rings(phba); - - /* Flush the outstanding NVME IOs if fc4 type enabled. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) - lpfc_sli_flush_nvme_rings(phba); + /* Flush all driver's outstanding I/Os as we are to reset */ + lpfc_sli_flush_io_rings(phba); /* stop all timers */ lpfc_stop_hba_timers(phba); @@ -12681,12 +12931,8 @@ lpfc_sli4_prep_dev_for_perm_failure(struct lpfc_hba *phba) /* stop all timers */ lpfc_stop_hba_timers(phba); - /* Clean up all driver's outstanding SCSI I/Os */ - lpfc_sli_flush_fcp_rings(phba); - - /* Flush the outstanding NVME IOs if fc4 type enabled. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) - lpfc_sli_flush_nvme_rings(phba); + /* Clean up all driver's outstanding I/Os */ + lpfc_sli_flush_io_rings(phba); } /** @@ -13227,19 +13473,6 @@ lpfc_exit(void) pci_unregister_driver(&lpfc_driver); fc_release_transport(lpfc_transport_template); fc_release_transport(lpfc_vport_transport_template); - if (_dump_buf_data) { - printk(KERN_ERR "9062 BLKGRD: freeing %lu pages for " - "_dump_buf_data at 0x%p\n", - (1L << _dump_buf_data_order), _dump_buf_data); - free_pages((unsigned long)_dump_buf_data, _dump_buf_data_order); - } - - if (_dump_buf_dif) { - printk(KERN_ERR "9049 BLKGRD: freeing %lu pages for " - "_dump_buf_dif at 0x%p\n", - (1L << _dump_buf_dif_order), _dump_buf_dif); - free_pages((unsigned long)_dump_buf_dif, _dump_buf_dif_order); - } idr_destroy(&lpfc_hba_index); } |