summaryrefslogtreecommitdiff
path: root/drivers/scsi/lpfc/lpfc_init.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_init.c')
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c1335
1 files changed, 784 insertions, 551 deletions
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index eaaef682de25..e8813d26e594 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -39,6 +39,7 @@
#include <linux/msi.h>
#include <linux/irq.h>
#include <linux/bitops.h>
+#include <linux/crash_dump.h>
#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
@@ -65,14 +66,8 @@
#include "lpfc_version.h"
#include "lpfc_ids.h"
-char *_dump_buf_data;
-unsigned long _dump_buf_data_order;
-char *_dump_buf_dif;
-unsigned long _dump_buf_dif_order;
-spinlock_t _dump_buf_lock;
-
/* Used when mapping IRQ vectors in a driver centric manner */
-uint32_t lpfc_present_cpu;
+static uint32_t lpfc_present_cpu;
static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
static int lpfc_post_rcv_buf(struct lpfc_hba *);
@@ -93,8 +88,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
static void lpfc_sli4_disable_intr(struct lpfc_hba *);
static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
-static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t);
static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
+static void lpfc_setup_bg(struct lpfc_hba *, struct Scsi_Host *);
static struct scsi_transport_template *lpfc_transport_template = NULL;
static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -1081,8 +1076,8 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
qp = &phba->sli4_hba.hdwq[idx];
- spin_lock(&qp->abts_scsi_buf_list_lock);
- list_splice_init(&qp->lpfc_abts_scsi_buf_list,
+ spin_lock(&qp->abts_io_buf_list_lock);
+ list_splice_init(&qp->lpfc_abts_io_buf_list,
&aborts);
list_for_each_entry_safe(psb, psb_next, &aborts, list) {
@@ -1093,29 +1088,11 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
spin_lock(&qp->io_buf_list_put_lock);
list_splice_init(&aborts, &qp->lpfc_io_buf_list_put);
qp->put_io_bufs += qp->abts_scsi_io_bufs;
+ qp->put_io_bufs += qp->abts_nvme_io_bufs;
qp->abts_scsi_io_bufs = 0;
+ qp->abts_nvme_io_bufs = 0;
spin_unlock(&qp->io_buf_list_put_lock);
- spin_unlock(&qp->abts_scsi_buf_list_lock);
-
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- spin_lock(&qp->abts_nvme_buf_list_lock);
- list_splice_init(&qp->lpfc_abts_nvme_buf_list,
- &nvme_aborts);
- list_for_each_entry_safe(psb, psb_next, &nvme_aborts,
- list) {
- psb->pCmd = NULL;
- psb->status = IOSTAT_SUCCESS;
- cnt++;
- }
- spin_lock(&qp->io_buf_list_put_lock);
- qp->put_io_bufs += qp->abts_nvme_io_bufs;
- qp->abts_nvme_io_bufs = 0;
- list_splice_init(&nvme_aborts,
- &qp->lpfc_io_buf_list_put);
- spin_unlock(&qp->io_buf_list_put_lock);
- spin_unlock(&qp->abts_nvme_buf_list_lock);
-
- }
+ spin_unlock(&qp->abts_io_buf_list_lock);
}
spin_unlock_irq(&phba->hbalock);
@@ -1261,6 +1238,7 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
unsigned char *eqcnt = NULL;
uint32_t usdelay;
int i;
+ bool update = false;
if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING)
return;
@@ -1274,18 +1252,29 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
if (!eqcnt)
goto requeue;
- for (i = 0; i < phba->cfg_irq_chann; i++) {
- eq = phba->sli4_hba.hdwq[i].hba_eq;
- if (eq && eqcnt[eq->last_cpu] < 2)
- eqcnt[eq->last_cpu]++;
- continue;
- }
+ if (phba->cfg_irq_chann > 1) {
+ /* Loop thru all IRQ vectors */
+ for (i = 0; i < phba->cfg_irq_chann; i++) {
+ /* Get the EQ corresponding to the IRQ vector */
+ eq = phba->sli4_hba.hba_eq_hdl[i].eq;
+ if (!eq)
+ continue;
+ if (eq->q_mode) {
+ update = true;
+ break;
+ }
+ if (eqcnt[eq->last_cpu] < 2)
+ eqcnt[eq->last_cpu]++;
+ }
+ } else
+ update = true;
for_each_present_cpu(i) {
- if (phba->cfg_irq_chann > 1 && eqcnt[i] < 2)
- continue;
-
eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
+ if (!update && eqcnt[i] < 2) {
+ eqi->icnt = 0;
+ continue;
+ }
usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) *
LPFC_EQ_DELAY_STEP;
@@ -1533,6 +1522,7 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
spin_unlock_irq(&phba->hbalock);
lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
+ lpfc_sli_flush_io_rings(phba);
lpfc_offline(phba);
lpfc_hba_down_post(phba);
lpfc_unblock_mgmt_io(phba);
@@ -1794,6 +1784,7 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action,
"2887 Reset Needed: Attempting Port "
"Recovery...\n");
lpfc_offline_prep(phba, mbx_action);
+ lpfc_sli_flush_io_rings(phba);
lpfc_offline(phba);
/* release interrupt for possible resource change */
lpfc_sli4_disable_intr(phba);
@@ -1913,7 +1904,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"7624 Firmware not ready: Failing UE recovery,"
" waited %dSec", i);
- lpfc_sli4_offline_eratt(phba);
+ phba->link_state = LPFC_HBA_ERROR;
break;
case LPFC_SLI_INTF_IF_TYPE_2:
@@ -1987,9 +1978,8 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
}
/* fall through for not able to recover */
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "3152 Unrecoverable error, bring the port "
- "offline\n");
- lpfc_sli4_offline_eratt(phba);
+ "3152 Unrecoverable error\n");
+ phba->link_state = LPFC_HBA_ERROR;
break;
case LPFC_SLI_INTF_IF_TYPE_1:
default:
@@ -2861,7 +2851,7 @@ lpfc_cleanup(struct lpfc_vport *vport)
&vport->fc_nodes, nlp_listp) {
lpfc_printf_vlog(ndlp->vport, KERN_ERR,
LOG_NODE,
- "0282 did:x%x ndlp:x%p "
+ "0282 did:x%x ndlp:x%px "
"usgmap:x%x refcnt:%d\n",
ndlp->nlp_DID, (void *)ndlp,
ndlp->nlp_usg_map,
@@ -2963,7 +2953,7 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba)
del_timer_sync(&phba->fcp_poll_timer);
break;
case LPFC_PCI_DEV_OC:
- /* Stop any OneConnect device sepcific driver timers */
+ /* Stop any OneConnect device specific driver timers */
lpfc_sli4_stop_fcf_redisc_wait_timer(phba);
break;
default:
@@ -3065,7 +3055,7 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba)
ndlp->nlp_rpi = rpi;
lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
"0009 rpi:%x DID:%x "
- "flg:%x map:%x %p\n", ndlp->nlp_rpi,
+ "flg:%x map:%x x%px\n", ndlp->nlp_rpi,
ndlp->nlp_DID, ndlp->nlp_flag,
ndlp->nlp_usg_map, ndlp);
}
@@ -3250,12 +3240,8 @@ static void lpfc_destroy_multixri_pools(struct lpfc_hba *phba)
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
lpfc_destroy_expedite_pool(phba);
- if (!(phba->pport->load_flag & FC_UNLOADING)) {
- lpfc_sli_flush_fcp_rings(phba);
-
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
- lpfc_sli_flush_nvme_rings(phba);
- }
+ if (!(phba->pport->load_flag & FC_UNLOADING))
+ lpfc_sli_flush_io_rings(phba);
hwq_count = phba->cfg_hdw_queue;
@@ -3489,7 +3475,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
lpfc_printf_vlog(ndlp->vport,
KERN_INFO, LOG_NODE,
"0011 lpfc_offline: "
- "ndlp:x%p did %x "
+ "ndlp:x%px did %x "
"usgmap:x%x rpi:%x\n",
ndlp, ndlp->nlp_DID,
ndlp->nlp_usg_map,
@@ -3634,6 +3620,9 @@ lpfc_io_free(struct lpfc_hba *phba)
qp->put_io_bufs--;
dma_pool_free(phba->lpfc_sg_dma_buf_pool,
lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+ if (phba->cfg_xpsgl && !phba->nvmet_support)
+ lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd);
+ lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd);
kfree(lpfc_ncmd);
qp->total_io_bufs--;
}
@@ -3647,6 +3636,9 @@ lpfc_io_free(struct lpfc_hba *phba)
qp->get_io_bufs--;
dma_pool_free(phba->lpfc_sg_dma_buf_pool,
lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+ if (phba->cfg_xpsgl && !phba->nvmet_support)
+ lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd);
+ lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd);
kfree(lpfc_ncmd);
qp->total_io_bufs--;
}
@@ -4095,18 +4087,9 @@ lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc)
LIST_HEAD(post_nblist);
LIST_HEAD(nvme_nblist);
- /* Sanity check to ensure our sizing is right for both SCSI and NVME */
- if (sizeof(struct lpfc_io_buf) > LPFC_COMMON_IO_BUF_SZ) {
- lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
- "6426 Common buffer size %zd exceeds %d\n",
- sizeof(struct lpfc_io_buf),
- LPFC_COMMON_IO_BUF_SZ);
- return 0;
- }
-
phba->sli4_hba.io_xri_cnt = 0;
for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
- lpfc_ncmd = kzalloc(LPFC_COMMON_IO_BUF_SZ, GFP_KERNEL);
+ lpfc_ncmd = kzalloc(sizeof(*lpfc_ncmd), GFP_KERNEL);
if (!lpfc_ncmd)
break;
/*
@@ -4114,31 +4097,38 @@ lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc)
* pci bus space for an I/O. The DMA buffer includes the
* number of SGE's necessary to support the sg_tablesize.
*/
- lpfc_ncmd->data = dma_pool_alloc(phba->lpfc_sg_dma_buf_pool,
- GFP_KERNEL,
- &lpfc_ncmd->dma_handle);
+ lpfc_ncmd->data = dma_pool_zalloc(phba->lpfc_sg_dma_buf_pool,
+ GFP_KERNEL,
+ &lpfc_ncmd->dma_handle);
if (!lpfc_ncmd->data) {
kfree(lpfc_ncmd);
break;
}
- memset(lpfc_ncmd->data, 0, phba->cfg_sg_dma_buf_size);
- /*
- * 4K Page alignment is CRITICAL to BlockGuard, double check
- * to be sure.
- */
- if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) &&
- (((unsigned long)(lpfc_ncmd->data) &
- (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
- lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
- "3369 Memory alignment err: addr=%lx\n",
- (unsigned long)lpfc_ncmd->data);
- dma_pool_free(phba->lpfc_sg_dma_buf_pool,
- lpfc_ncmd->data, lpfc_ncmd->dma_handle);
- kfree(lpfc_ncmd);
- break;
+ if (phba->cfg_xpsgl && !phba->nvmet_support) {
+ INIT_LIST_HEAD(&lpfc_ncmd->dma_sgl_xtra_list);
+ } else {
+ /*
+ * 4K Page alignment is CRITICAL to BlockGuard, double
+ * check to be sure.
+ */
+ if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) &&
+ (((unsigned long)(lpfc_ncmd->data) &
+ (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+ "3369 Memory alignment err: "
+ "addr=%lx\n",
+ (unsigned long)lpfc_ncmd->data);
+ dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+ lpfc_ncmd->data,
+ lpfc_ncmd->dma_handle);
+ kfree(lpfc_ncmd);
+ break;
+ }
}
+ INIT_LIST_HEAD(&lpfc_ncmd->dma_cmd_rsp_list);
+
lxri = lpfc_sli4_next_xritag(phba);
if (lxri == NO_XRI) {
dma_pool_free(phba->lpfc_sg_dma_buf_pool,
@@ -4308,14 +4298,20 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
shost->max_cmd_len = 16;
if (phba->sli_rev == LPFC_SLI_REV4) {
- if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)
- shost->nr_hw_queues = phba->cfg_hdw_queue;
- else
- shost->nr_hw_queues = phba->sli4_hba.num_present_cpu;
+ if (!phba->cfg_fcp_mq_threshold ||
+ phba->cfg_fcp_mq_threshold > phba->cfg_hdw_queue)
+ phba->cfg_fcp_mq_threshold = phba->cfg_hdw_queue;
+
+ shost->nr_hw_queues = min_t(int, 2 * num_possible_nodes(),
+ phba->cfg_fcp_mq_threshold);
shost->dma_boundary =
phba->sli4_hba.pc_sli4_params.sge_supp_len-1;
- shost->sg_tablesize = phba->cfg_scsi_seg_cnt;
+
+ if (phba->cfg_xpsgl && !phba->nvmet_support)
+ shost->sg_tablesize = LPFC_MAX_SG_TABLESIZE;
+ else
+ shost->sg_tablesize = phba->cfg_scsi_seg_cnt;
} else
/* SLI-3 has a limited number of hardware queues (3),
* thus there is only one for FCP processing.
@@ -4347,6 +4343,9 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
timer_setup(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo, 0);
+ if (phba->sli3_options & LPFC_SLI3_BG_ENABLED)
+ lpfc_setup_bg(phba, shost);
+
error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
if (error)
goto out_put_shost;
@@ -5055,7 +5054,7 @@ lpfc_update_trunk_link_status(struct lpfc_hba *phba,
bf_get(lpfc_acqe_fc_la_speed, acqe_fc));
phba->sli4_hba.link_state.logical_speed =
- bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc);
+ bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc) * 10;
/* We got FC link speed, convert to fc_linkspeed (READ_TOPOLOGY) */
phba->fc_linkspeed =
lpfc_async_link_speed_to_read_top(
@@ -5158,8 +5157,14 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
bf_get(lpfc_acqe_fc_la_port_number, acqe_fc);
phba->sli4_hba.link_state.fault =
bf_get(lpfc_acqe_link_fault, acqe_fc);
- phba->sli4_hba.link_state.logical_speed =
+
+ if (bf_get(lpfc_acqe_fc_la_att_type, acqe_fc) ==
+ LPFC_FC_LA_TYPE_LINK_DOWN)
+ phba->sli4_hba.link_state.logical_speed = 0;
+ else if (!phba->sli4_hba.conf_trunk)
+ phba->sli4_hba.link_state.logical_speed =
bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc) * 10;
+
lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
"2896 Async FC event - Speed:%dGBaud Topology:x%x "
"LA Type:x%x Port Type:%d Port Number:%d Logical speed:"
@@ -6324,6 +6329,24 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
if (lpfc_mem_alloc(phba, BPL_ALIGN_SZ))
return -ENOMEM;
+ phba->lpfc_sg_dma_buf_pool =
+ dma_pool_create("lpfc_sg_dma_buf_pool",
+ &phba->pcidev->dev, phba->cfg_sg_dma_buf_size,
+ BPL_ALIGN_SZ, 0);
+
+ if (!phba->lpfc_sg_dma_buf_pool)
+ goto fail_free_mem;
+
+ phba->lpfc_cmd_rsp_buf_pool =
+ dma_pool_create("lpfc_cmd_rsp_buf_pool",
+ &phba->pcidev->dev,
+ sizeof(struct fcp_cmnd) +
+ sizeof(struct fcp_rsp),
+ BPL_ALIGN_SZ, 0);
+
+ if (!phba->lpfc_cmd_rsp_buf_pool)
+ goto fail_free_dma_buf_pool;
+
/*
* Enable sr-iov virtual functions if supported and configured
* through the module parameter.
@@ -6342,6 +6365,13 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
}
return 0;
+
+fail_free_dma_buf_pool:
+ dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
+ phba->lpfc_sg_dma_buf_pool = NULL;
+fail_free_mem:
+ lpfc_mem_free(phba);
+ return -ENOMEM;
}
/**
@@ -6402,6 +6432,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
if (rc)
return -ENODEV;
+ /* Allocate all driver workqueues here */
+
+ /* The lpfc_wq workqueue for deferred irq use */
+ phba->wq = alloc_workqueue("lpfc_wq", WQ_MEM_RECLAIM, 0);
+
/*
* Initialize timers used by driver
*/
@@ -6436,102 +6471,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
* The WQ create will allocate the ring.
*/
- /*
- * 1 for cmd, 1 for rsp, NVME adds an extra one
- * for boundary conditions in its max_sgl_segment template.
- */
- extra = 2;
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
- extra++;
-
- /*
- * It doesn't matter what family our adapter is in, we are
- * limited to 2 Pages, 512 SGEs, for our SGL.
- * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp
- */
- max_buf_size = (2 * SLI4_PAGE_SIZE);
-
- /*
- * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size
- * used to create the sg_dma_buf_pool must be calculated.
- */
- if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) {
- /*
- * The scsi_buf for a T10-DIF I/O holds the FCP cmnd,
- * the FCP rsp, and a SGE. Sice we have no control
- * over how many protection segments the SCSI Layer
- * will hand us (ie: there could be one for every block
- * in the IO), just allocate enough SGEs to accomidate
- * our max amount and we need to limit lpfc_sg_seg_cnt
- * to minimize the risk of running out.
- */
- phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
- sizeof(struct fcp_rsp) + max_buf_size;
-
- /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
- phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
-
- /*
- * If supporting DIF, reduce the seg count for scsi to
- * allow room for the DIF sges.
- */
- if (phba->cfg_enable_bg &&
- phba->cfg_sg_seg_cnt > LPFC_MAX_BG_SLI4_SEG_CNT_DIF)
- phba->cfg_scsi_seg_cnt = LPFC_MAX_BG_SLI4_SEG_CNT_DIF;
- else
- phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
-
- } else {
- /*
- * The scsi_buf for a regular I/O holds the FCP cmnd,
- * the FCP rsp, a SGE for each, and a SGE for up to
- * cfg_sg_seg_cnt data segments.
- */
- phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
- sizeof(struct fcp_rsp) +
- ((phba->cfg_sg_seg_cnt + extra) *
- sizeof(struct sli4_sge));
-
- /* Total SGEs for scsi_sg_list */
- phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + extra;
- phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
-
- /*
- * NOTE: if (phba->cfg_sg_seg_cnt + extra) <= 256 we only
- * need to post 1 page for the SGL.
- */
- }
-
- /* Limit to LPFC_MAX_NVME_SEG_CNT for NVME. */
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
- lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
- "6300 Reducing NVME sg segment "
- "cnt to %d\n",
- LPFC_MAX_NVME_SEG_CNT);
- phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
- } else
- phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
- }
-
- /* Initialize the host templates with the updated values. */
- lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
- lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
- lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt;
-
- if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ)
- phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
- else
- phba->cfg_sg_dma_buf_size =
- SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size);
-
- lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
- "9087 sg_seg_cnt:%d dmabuf_size:%d "
- "total:%d scsi:%d nvme:%d\n",
- phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
- phba->cfg_total_seg_cnt, phba->cfg_scsi_seg_cnt,
- phba->cfg_nvme_seg_cnt);
-
/* Initialize buffer queue management fields */
INIT_LIST_HEAD(&phba->hbqs[LPFC_ELS_HBQ].hbq_buffer_list);
phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_sli4_rb_alloc;
@@ -6540,17 +6479,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
/*
* Initialize the SLI Layer to run with lpfc SLI4 HBAs.
*/
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
- /* Initialize the Abort scsi buffer list used by driver */
- spin_lock_init(&phba->sli4_hba.abts_scsi_buf_list_lock);
- INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
- }
+ /* Initialize the Abort buffer list used by driver */
+ spin_lock_init(&phba->sli4_hba.abts_io_buf_list_lock);
+ INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_io_buf_list);
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
/* Initialize the Abort nvme buffer list used by driver */
spin_lock_init(&phba->sli4_hba.abts_nvmet_buf_list_lock);
INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list);
+ spin_lock_init(&phba->sli4_hba.t_active_list_lock);
+ INIT_LIST_HEAD(&phba->sli4_hba.t_active_ctx_list);
}
/* This abort list used by worker thread */
@@ -6750,6 +6689,131 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
}
}
+ /*
+ * 1 for cmd, 1 for rsp, NVME adds an extra one
+ * for boundary conditions in its max_sgl_segment template.
+ */
+ extra = 2;
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+ extra++;
+
+ /*
+ * It doesn't matter what family our adapter is in, we are
+ * limited to 2 Pages, 512 SGEs, for our SGL.
+ * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp
+ */
+ max_buf_size = (2 * SLI4_PAGE_SIZE);
+
+ /*
+ * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size
+ * used to create the sg_dma_buf_pool must be calculated.
+ */
+ if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) {
+ /* Both cfg_enable_bg and cfg_external_dif code paths */
+
+ /*
+ * The scsi_buf for a T10-DIF I/O holds the FCP cmnd,
+ * the FCP rsp, and a SGE. Sice we have no control
+ * over how many protection segments the SCSI Layer
+ * will hand us (ie: there could be one for every block
+ * in the IO), just allocate enough SGEs to accomidate
+ * our max amount and we need to limit lpfc_sg_seg_cnt
+ * to minimize the risk of running out.
+ */
+ phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+ sizeof(struct fcp_rsp) + max_buf_size;
+
+ /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
+ phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
+
+ /*
+ * If supporting DIF, reduce the seg count for scsi to
+ * allow room for the DIF sges.
+ */
+ if (phba->cfg_enable_bg &&
+ phba->cfg_sg_seg_cnt > LPFC_MAX_BG_SLI4_SEG_CNT_DIF)
+ phba->cfg_scsi_seg_cnt = LPFC_MAX_BG_SLI4_SEG_CNT_DIF;
+ else
+ phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
+
+ } else {
+ /*
+ * The scsi_buf for a regular I/O holds the FCP cmnd,
+ * the FCP rsp, a SGE for each, and a SGE for up to
+ * cfg_sg_seg_cnt data segments.
+ */
+ phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+ sizeof(struct fcp_rsp) +
+ ((phba->cfg_sg_seg_cnt + extra) *
+ sizeof(struct sli4_sge));
+
+ /* Total SGEs for scsi_sg_list */
+ phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + extra;
+ phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
+
+ /*
+ * NOTE: if (phba->cfg_sg_seg_cnt + extra) <= 256 we only
+ * need to post 1 page for the SGL.
+ */
+ }
+
+ if (phba->cfg_xpsgl && !phba->nvmet_support)
+ phba->cfg_sg_dma_buf_size = LPFC_DEFAULT_XPSGL_SIZE;
+ else if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ)
+ phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
+ else
+ phba->cfg_sg_dma_buf_size =
+ SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size);
+
+ phba->border_sge_num = phba->cfg_sg_dma_buf_size /
+ sizeof(struct sli4_sge);
+
+ /* Limit to LPFC_MAX_NVME_SEG_CNT for NVME. */
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+ if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
+ "6300 Reducing NVME sg segment "
+ "cnt to %d\n",
+ LPFC_MAX_NVME_SEG_CNT);
+ phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
+ } else
+ phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
+ }
+
+ /* Initialize the host templates with the updated values. */
+ lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
+ lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
+ lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
+ "9087 sg_seg_cnt:%d dmabuf_size:%d "
+ "total:%d scsi:%d nvme:%d\n",
+ phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
+ phba->cfg_total_seg_cnt, phba->cfg_scsi_seg_cnt,
+ phba->cfg_nvme_seg_cnt);
+
+ if (phba->cfg_sg_dma_buf_size < SLI4_PAGE_SIZE)
+ i = phba->cfg_sg_dma_buf_size;
+ else
+ i = SLI4_PAGE_SIZE;
+
+ phba->lpfc_sg_dma_buf_pool =
+ dma_pool_create("lpfc_sg_dma_buf_pool",
+ &phba->pcidev->dev,
+ phba->cfg_sg_dma_buf_size,
+ i, 0);
+ if (!phba->lpfc_sg_dma_buf_pool)
+ goto out_free_bsmbx;
+
+ phba->lpfc_cmd_rsp_buf_pool =
+ dma_pool_create("lpfc_cmd_rsp_buf_pool",
+ &phba->pcidev->dev,
+ sizeof(struct fcp_cmnd) +
+ sizeof(struct fcp_rsp),
+ i, 0);
+ if (!phba->lpfc_cmd_rsp_buf_pool)
+ goto out_free_sg_dma_buf;
+
mempool_free(mboxq, phba->mbox_mem_pool);
/* Verify OAS is supported */
@@ -6761,12 +6825,12 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
/* Verify all the SLI4 queues */
rc = lpfc_sli4_queue_verify(phba);
if (rc)
- goto out_free_bsmbx;
+ goto out_free_cmd_rsp_buf;
/* Create driver internal CQE event pool */
rc = lpfc_sli4_cq_event_pool_create(phba);
if (rc)
- goto out_free_bsmbx;
+ goto out_free_cmd_rsp_buf;
/* Initialize sgl lists per host */
lpfc_init_sgl_list(phba);
@@ -6857,6 +6921,12 @@ out_free_active_sgl:
lpfc_free_active_sgl(phba);
out_destroy_cq_event_pool:
lpfc_sli4_cq_event_pool_destroy(phba);
+out_free_cmd_rsp_buf:
+ dma_pool_destroy(phba->lpfc_cmd_rsp_buf_pool);
+ phba->lpfc_cmd_rsp_buf_pool = NULL;
+out_free_sg_dma_buf:
+ dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
+ phba->lpfc_sg_dma_buf_pool = NULL;
out_free_bsmbx:
lpfc_destroy_bootstrap_mbox(phba);
out_free_mem:
@@ -6983,12 +7053,6 @@ lpfc_setup_driver_resource_phase2(struct lpfc_hba *phba)
return error;
}
- /* The lpfc_wq workqueue for deferred irq use, is only used for SLI4 */
- if (phba->sli_rev == LPFC_SLI_REV4)
- phba->wq = alloc_workqueue("lpfc_wq", WQ_MEM_RECLAIM, 0);
- else
- phba->wq = NULL;
-
return 0;
}
@@ -7549,7 +7613,6 @@ lpfc_setup_bg(struct lpfc_hba *phba, struct Scsi_Host *shost)
uint32_t old_mask;
uint32_t old_guard;
- int pagecnt = 10;
if (phba->cfg_prot_mask && phba->cfg_prot_guard) {
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"1478 Registering BlockGuard with the "
@@ -7586,56 +7649,6 @@ lpfc_setup_bg(struct lpfc_hba *phba, struct Scsi_Host *shost)
"layer, Bad protection parameters: %d %d\n",
old_mask, old_guard);
}
-
- if (!_dump_buf_data) {
- while (pagecnt) {
- spin_lock_init(&_dump_buf_lock);
- _dump_buf_data =
- (char *) __get_free_pages(GFP_KERNEL, pagecnt);
- if (_dump_buf_data) {
- lpfc_printf_log(phba, KERN_ERR, LOG_BG,
- "9043 BLKGRD: allocated %d pages for "
- "_dump_buf_data at 0x%p\n",
- (1 << pagecnt), _dump_buf_data);
- _dump_buf_data_order = pagecnt;
- memset(_dump_buf_data, 0,
- ((1 << PAGE_SHIFT) << pagecnt));
- break;
- } else
- --pagecnt;
- }
- if (!_dump_buf_data_order)
- lpfc_printf_log(phba, KERN_ERR, LOG_BG,
- "9044 BLKGRD: ERROR unable to allocate "
- "memory for hexdump\n");
- } else
- lpfc_printf_log(phba, KERN_ERR, LOG_BG,
- "9045 BLKGRD: already allocated _dump_buf_data=0x%p"
- "\n", _dump_buf_data);
- if (!_dump_buf_dif) {
- while (pagecnt) {
- _dump_buf_dif =
- (char *) __get_free_pages(GFP_KERNEL, pagecnt);
- if (_dump_buf_dif) {
- lpfc_printf_log(phba, KERN_ERR, LOG_BG,
- "9046 BLKGRD: allocated %d pages for "
- "_dump_buf_dif at 0x%p\n",
- (1 << pagecnt), _dump_buf_dif);
- _dump_buf_dif_order = pagecnt;
- memset(_dump_buf_dif, 0,
- ((1 << PAGE_SHIFT) << pagecnt));
- break;
- } else
- --pagecnt;
- }
- if (!_dump_buf_dif_order)
- lpfc_printf_log(phba, KERN_ERR, LOG_BG,
- "9047 BLKGRD: ERROR unable to allocate "
- "memory for hexdump\n");
- } else
- lpfc_printf_log(phba, KERN_ERR, LOG_BG,
- "9048 BLKGRD: already allocated _dump_buf_dif=0x%p\n",
- _dump_buf_dif);
}
/**
@@ -7660,8 +7673,6 @@ lpfc_post_init_setup(struct lpfc_hba *phba)
*/
shost = pci_get_drvdata(phba->pcidev);
shost->can_queue = phba->cfg_hba_queue_depth - 10;
- if (phba->sli3_options & LPFC_SLI3_BG_ENABLED)
- lpfc_setup_bg(phba, shost);
lpfc_host_attrib_init(shost);
@@ -8297,6 +8308,10 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
bf_get(lpfc_mbx_rd_conf_extnts_inuse, rd_config);
phba->sli4_hba.max_cfg_param.max_xri =
bf_get(lpfc_mbx_rd_conf_xri_count, rd_config);
+ /* Reduce resource usage in kdump environment */
+ if (is_kdump_kernel() &&
+ phba->sli4_hba.max_cfg_param.max_xri > 512)
+ phba->sli4_hba.max_cfg_param.max_xri = 512;
phba->sli4_hba.max_cfg_param.xri_base =
bf_get(lpfc_mbx_rd_conf_xri_base, rd_config);
phba->sli4_hba.max_cfg_param.max_vpi =
@@ -8370,11 +8385,6 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
*/
qmin -= 4;
- /* If NVME is configured, double the number of CQ/WQs needed */
- if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
- !phba->nvmet_support)
- qmin /= 2;
-
/* Check to see if there is enough for NVME */
if ((phba->cfg_irq_chann > qmin) ||
(phba->cfg_hdw_queue > qmin)) {
@@ -8631,51 +8641,14 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
}
static int
-lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
+lpfc_alloc_io_wq_cq(struct lpfc_hba *phba, int idx)
{
struct lpfc_queue *qdesc;
+ u32 wqesize;
int cpu;
- cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
- qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
- phba->sli4_hba.cq_esize,
- LPFC_CQE_EXP_COUNT, cpu);
- if (!qdesc) {
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0508 Failed allocate fast-path NVME CQ (%d)\n",
- wqidx);
- return 1;
- }
- qdesc->qe_valid = 1;
- qdesc->hdwq = wqidx;
- qdesc->chann = cpu;
- phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc;
-
- qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
- LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT,
- cpu);
- if (!qdesc) {
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0509 Failed allocate fast-path NVME WQ (%d)\n",
- wqidx);
- return 1;
- }
- qdesc->hdwq = wqidx;
- qdesc->chann = wqidx;
- phba->sli4_hba.hdwq[wqidx].nvme_wq = qdesc;
- list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
- return 0;
-}
-
-static int
-lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
-{
- struct lpfc_queue *qdesc;
- uint32_t wqesize;
- int cpu;
-
- cpu = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
- /* Create Fast Path FCP CQs */
+ cpu = lpfc_find_cpu_handle(phba, idx, LPFC_FIND_BY_HDWQ);
+ /* Create Fast Path IO CQs */
if (phba->enab_exp_wqcq_pages)
/* Increase the CQ size when WQEs contain an embedded cdb */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
@@ -8688,15 +8661,15 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
phba->sli4_hba.cq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx);
+ "0499 Failed allocate fast-path IO CQ (%d)\n", idx);
return 1;
}
qdesc->qe_valid = 1;
- qdesc->hdwq = wqidx;
+ qdesc->hdwq = idx;
qdesc->chann = cpu;
- phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc;
+ phba->sli4_hba.hdwq[idx].io_cq = qdesc;
- /* Create Fast Path FCP WQs */
+ /* Create Fast Path IO WQs */
if (phba->enab_exp_wqcq_pages) {
/* Increase the WQ size when WQEs contain an embedded cdb */
wqesize = (phba->fcp_embed_io) ?
@@ -8711,13 +8684,13 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0503 Failed allocate fast-path FCP WQ (%d)\n",
- wqidx);
+ "0503 Failed allocate fast-path IO WQ (%d)\n",
+ idx);
return 1;
}
- qdesc->hdwq = wqidx;
- qdesc->chann = wqidx;
- phba->sli4_hba.hdwq[wqidx].fcp_wq = qdesc;
+ qdesc->hdwq = idx;
+ qdesc->chann = cpu;
+ phba->sli4_hba.hdwq[idx].io_wq = qdesc;
list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
return 0;
}
@@ -8740,8 +8713,10 @@ int
lpfc_sli4_queue_create(struct lpfc_hba *phba)
{
struct lpfc_queue *qdesc;
- int idx, eqidx, cpu;
+ int idx, cpu, eqcpu;
struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_vector_map_info *cpup;
+ struct lpfc_vector_map_info *eqcpup;
struct lpfc_eq_intr_info *eqi;
/*
@@ -8779,12 +8754,13 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
qp->get_io_bufs = 0;
qp->put_io_bufs = 0;
qp->total_io_bufs = 0;
- spin_lock_init(&qp->abts_scsi_buf_list_lock);
- INIT_LIST_HEAD(&qp->lpfc_abts_scsi_buf_list);
+ spin_lock_init(&qp->abts_io_buf_list_lock);
+ INIT_LIST_HEAD(&qp->lpfc_abts_io_buf_list);
qp->abts_scsi_io_bufs = 0;
- spin_lock_init(&qp->abts_nvme_buf_list_lock);
- INIT_LIST_HEAD(&qp->lpfc_abts_nvme_buf_list);
qp->abts_nvme_io_bufs = 0;
+ INIT_LIST_HEAD(&qp->sgl_list);
+ INIT_LIST_HEAD(&qp->cmd_rsp_buf_list);
+ spin_lock_init(&qp->hdwq_lock);
}
}
@@ -8826,76 +8802,86 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
/* Create HBA Event Queues (EQs) */
- for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
- /* determine EQ affinity */
- eqidx = lpfc_find_eq_handle(phba, idx);
- cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ);
- /*
- * If there are more Hardware Queues than available
- * EQs, multiple Hardware Queues may share a common EQ.
+ for_each_present_cpu(cpu) {
+ /* We only want to create 1 EQ per vector, even though
+ * multiple CPUs might be using that vector. so only
+ * selects the CPUs that are LPFC_CPU_FIRST_IRQ.
*/
- if (idx >= phba->cfg_irq_chann) {
- /* Share an existing EQ */
- phba->sli4_hba.hdwq[idx].hba_eq =
- phba->sli4_hba.hdwq[eqidx].hba_eq;
+ cpup = &phba->sli4_hba.cpu_map[cpu];
+ if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
continue;
- }
- /* Create an EQ */
+
+ /* Get a ptr to the Hardware Queue associated with this CPU */
+ qp = &phba->sli4_hba.hdwq[cpup->hdwq];
+
+ /* Allocate an EQ */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.eq_esize,
phba->sli4_hba.eq_ecount, cpu);
if (!qdesc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0497 Failed allocate EQ (%d)\n", idx);
+ "0497 Failed allocate EQ (%d)\n",
+ cpup->hdwq);
goto out_error;
}
qdesc->qe_valid = 1;
- qdesc->hdwq = idx;
-
- /* Save the CPU this EQ is affinitised to */
- qdesc->chann = cpu;
- phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
+ qdesc->hdwq = cpup->hdwq;
+ qdesc->chann = cpu; /* First CPU this EQ is affinitized to */
qdesc->last_cpu = qdesc->chann;
+
+ /* Save the allocated EQ in the Hardware Queue */
+ qp->hba_eq = qdesc;
+
eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
list_add(&qdesc->cpu_list, &eqi->list);
}
+ /* Now we need to populate the other Hardware Queues, that share
+ * an IRQ vector, with the associated EQ ptr.
+ */
+ for_each_present_cpu(cpu) {
+ cpup = &phba->sli4_hba.cpu_map[cpu];
+
+ /* Check for EQ already allocated in previous loop */
+ if (cpup->flag & LPFC_CPU_FIRST_IRQ)
+ continue;
+
+ /* Check for multiple CPUs per hdwq */
+ qp = &phba->sli4_hba.hdwq[cpup->hdwq];
+ if (qp->hba_eq)
+ continue;
+
+ /* We need to share an EQ for this hdwq */
+ eqcpu = lpfc_find_cpu_handle(phba, cpup->eq, LPFC_FIND_BY_EQ);
+ eqcpup = &phba->sli4_hba.cpu_map[eqcpu];
+ qp->hba_eq = phba->sli4_hba.hdwq[eqcpup->hdwq].hba_eq;
+ }
- /* Allocate SCSI SLI4 CQ/WQs */
+ /* Allocate IO Path SLI4 CQ/WQs */
for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
- if (lpfc_alloc_fcp_wq_cq(phba, idx))
+ if (lpfc_alloc_io_wq_cq(phba, idx))
goto out_error;
}
- /* Allocate NVME SLI4 CQ/WQs */
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
- if (lpfc_alloc_nvme_wq_cq(phba, idx))
- goto out_error;
- }
-
- if (phba->nvmet_support) {
- for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
- cpu = lpfc_find_cpu_handle(phba, idx,
- LPFC_FIND_BY_HDWQ);
- qdesc = lpfc_sli4_queue_alloc(
- phba,
+ if (phba->nvmet_support) {
+ for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+ cpu = lpfc_find_cpu_handle(phba, idx,
+ LPFC_FIND_BY_HDWQ);
+ qdesc = lpfc_sli4_queue_alloc(phba,
LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.cq_esize,
phba->sli4_hba.cq_ecount,
cpu);
- if (!qdesc) {
- lpfc_printf_log(
- phba, KERN_ERR, LOG_INIT,
+ if (!qdesc) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3142 Failed allocate NVME "
"CQ Set (%d)\n", idx);
- goto out_error;
- }
- qdesc->qe_valid = 1;
- qdesc->hdwq = idx;
- qdesc->chann = cpu;
- phba->sli4_hba.nvmet_cqset[idx] = qdesc;
+ goto out_error;
}
+ qdesc->qe_valid = 1;
+ qdesc->hdwq = idx;
+ qdesc->chann = cpu;
+ phba->sli4_hba.nvmet_cqset[idx] = qdesc;
}
}
@@ -8926,7 +8912,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
goto out_error;
}
qdesc->qe_valid = 1;
- qdesc->chann = 0;
+ qdesc->chann = cpu;
phba->sli4_hba.els_cq = qdesc;
@@ -8944,7 +8930,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
"0505 Failed allocate slow-path MQ\n");
goto out_error;
}
- qdesc->chann = 0;
+ qdesc->chann = cpu;
phba->sli4_hba.mbx_wq = qdesc;
/*
@@ -8960,7 +8946,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
"0504 Failed allocate slow-path ELS WQ\n");
goto out_error;
}
- qdesc->chann = 0;
+ qdesc->chann = cpu;
phba->sli4_hba.els_wq = qdesc;
list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
@@ -8974,7 +8960,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
"6079 Failed allocate NVME LS CQ\n");
goto out_error;
}
- qdesc->chann = 0;
+ qdesc->chann = cpu;
qdesc->qe_valid = 1;
phba->sli4_hba.nvmels_cq = qdesc;
@@ -8987,7 +8973,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
"6080 Failed allocate NVME LS WQ\n");
goto out_error;
}
- qdesc->chann = 0;
+ qdesc->chann = cpu;
phba->sli4_hba.nvmels_wq = qdesc;
list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
}
@@ -9069,7 +9055,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
}
}
-#if defined(BUILD_NVME)
/* Clear NVME stats */
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
@@ -9077,7 +9062,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat));
}
}
-#endif
/* Clear SCSI stats */
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
@@ -9122,22 +9106,28 @@ static inline void
lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
{
struct lpfc_sli4_hdw_queue *hdwq;
+ struct lpfc_queue *eq;
uint32_t idx;
hdwq = phba->sli4_hba.hdwq;
- for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
- if (idx < phba->cfg_irq_chann)
- lpfc_sli4_queue_free(hdwq[idx].hba_eq);
- hdwq[idx].hba_eq = NULL;
- lpfc_sli4_queue_free(hdwq[idx].fcp_cq);
- lpfc_sli4_queue_free(hdwq[idx].nvme_cq);
- lpfc_sli4_queue_free(hdwq[idx].fcp_wq);
- lpfc_sli4_queue_free(hdwq[idx].nvme_wq);
- hdwq[idx].fcp_cq = NULL;
- hdwq[idx].nvme_cq = NULL;
- hdwq[idx].fcp_wq = NULL;
- hdwq[idx].nvme_wq = NULL;
+ /* Loop thru all Hardware Queues */
+ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+ /* Free the CQ/WQ corresponding to the Hardware Queue */
+ lpfc_sli4_queue_free(hdwq[idx].io_cq);
+ lpfc_sli4_queue_free(hdwq[idx].io_wq);
+ hdwq[idx].io_cq = NULL;
+ hdwq[idx].io_wq = NULL;
+ if (phba->cfg_xpsgl && !phba->nvmet_support)
+ lpfc_free_sgl_per_hdwq(phba, &hdwq[idx]);
+ lpfc_free_cmd_rsp_buf_per_hdwq(phba, &hdwq[idx]);
+ }
+ /* Loop thru all IRQ vectors */
+ for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
+ /* Free the EQ corresponding to the IRQ vector */
+ eq = phba->sli4_hba.hba_eq_hdl[idx].eq;
+ lpfc_sli4_queue_free(eq);
+ phba->sli4_hba.hba_eq_hdl[idx].eq = NULL;
}
}
@@ -9316,21 +9306,21 @@ static void
lpfc_setup_cq_lookup(struct lpfc_hba *phba)
{
struct lpfc_queue *eq, *childq;
- struct lpfc_sli4_hdw_queue *qp;
int qidx;
- qp = phba->sli4_hba.hdwq;
memset(phba->sli4_hba.cq_lookup, 0,
(sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1)));
+ /* Loop thru all IRQ vectors */
for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
- eq = qp[qidx].hba_eq;
+ /* Get the EQ corresponding to the IRQ vector */
+ eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
if (!eq)
continue;
+ /* Loop through all CQs associated with that EQ */
list_for_each_entry(childq, &eq->child_list, list) {
if (childq->queue_id > phba->sli4_hba.cq_max)
continue;
- if ((childq->subtype == LPFC_FCP) ||
- (childq->subtype == LPFC_NVME))
+ if (childq->subtype == LPFC_IO)
phba->sli4_hba.cq_lookup[childq->queue_id] =
childq;
}
@@ -9354,9 +9344,10 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
{
uint32_t shdr_status, shdr_add_status;
union lpfc_sli4_cfg_shdr *shdr;
+ struct lpfc_vector_map_info *cpup;
struct lpfc_sli4_hdw_queue *qp;
LPFC_MBOXQ_t *mboxq;
- int qidx;
+ int qidx, cpu;
uint32_t length, usdelay;
int rc = -ENOMEM;
@@ -9417,57 +9408,60 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
rc = -ENOMEM;
goto out_error;
}
+
+ /* Loop thru all IRQ vectors */
for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
- if (!qp[qidx].hba_eq) {
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0522 Fast-path EQ (%d) not "
- "allocated\n", qidx);
- rc = -ENOMEM;
- goto out_destroy;
- }
- rc = lpfc_eq_create(phba, qp[qidx].hba_eq,
- phba->cfg_fcp_imax);
- if (rc) {
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0523 Failed setup of fast-path EQ "
- "(%d), rc = 0x%x\n", qidx,
- (uint32_t)rc);
- goto out_destroy;
- }
- lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
- "2584 HBA EQ setup: queue[%d]-id=%d\n", qidx,
- qp[qidx].hba_eq->queue_id);
- }
+ /* Create HBA Event Queues (EQs) in order */
+ for_each_present_cpu(cpu) {
+ cpup = &phba->sli4_hba.cpu_map[cpu];
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
- rc = lpfc_create_wq_cq(phba,
- qp[qidx].hba_eq,
- qp[qidx].nvme_cq,
- qp[qidx].nvme_wq,
- &phba->sli4_hba.hdwq[qidx].nvme_cq_map,
- qidx, LPFC_NVME);
+ /* Look for the CPU thats using that vector with
+ * LPFC_CPU_FIRST_IRQ set.
+ */
+ if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
+ continue;
+ if (qidx != cpup->eq)
+ continue;
+
+ /* Create an EQ for that vector */
+ rc = lpfc_eq_create(phba, qp[cpup->hdwq].hba_eq,
+ phba->cfg_fcp_imax);
if (rc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "6123 Failed to setup fastpath "
- "NVME WQ/CQ (%d), rc = 0x%x\n",
- qidx, (uint32_t)rc);
+ "0523 Failed setup of fast-path"
+ " EQ (%d), rc = 0x%x\n",
+ cpup->eq, (uint32_t)rc);
goto out_destroy;
}
+
+ /* Save the EQ for that vector in the hba_eq_hdl */
+ phba->sli4_hba.hba_eq_hdl[cpup->eq].eq =
+ qp[cpup->hdwq].hba_eq;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "2584 HBA EQ setup: queue[%d]-id=%d\n",
+ cpup->eq,
+ qp[cpup->hdwq].hba_eq->queue_id);
}
}
+ /* Loop thru all Hardware Queues */
for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+ cpu = lpfc_find_cpu_handle(phba, qidx, LPFC_FIND_BY_HDWQ);
+ cpup = &phba->sli4_hba.cpu_map[cpu];
+
+ /* Create the CQ/WQ corresponding to the Hardware Queue */
rc = lpfc_create_wq_cq(phba,
- qp[qidx].hba_eq,
- qp[qidx].fcp_cq,
- qp[qidx].fcp_wq,
- &phba->sli4_hba.hdwq[qidx].fcp_cq_map,
- qidx, LPFC_FCP);
+ phba->sli4_hba.hdwq[cpup->hdwq].hba_eq,
+ qp[qidx].io_cq,
+ qp[qidx].io_wq,
+ &phba->sli4_hba.hdwq[qidx].io_cq_map,
+ qidx,
+ LPFC_IO);
if (rc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0535 Failed to setup fastpath "
- "FCP WQ/CQ (%d), rc = 0x%x\n",
+ "IO WQ/CQ (%d), rc = 0x%x\n",
qidx, (uint32_t)rc);
goto out_destroy;
}
@@ -9711,6 +9705,7 @@ void
lpfc_sli4_queue_unset(struct lpfc_hba *phba)
{
struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_queue *eq;
int qidx;
/* Unset mailbox command work queue */
@@ -9762,14 +9757,18 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
/* Unset fast-path SLI4 queues */
if (phba->sli4_hba.hdwq) {
+ /* Loop thru all Hardware Queues */
for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+ /* Destroy the CQ/WQ corresponding to Hardware Queue */
qp = &phba->sli4_hba.hdwq[qidx];
- lpfc_wq_destroy(phba, qp->fcp_wq);
- lpfc_wq_destroy(phba, qp->nvme_wq);
- lpfc_cq_destroy(phba, qp->fcp_cq);
- lpfc_cq_destroy(phba, qp->nvme_cq);
- if (qidx < phba->cfg_irq_chann)
- lpfc_eq_destroy(phba, qp->hba_eq);
+ lpfc_wq_destroy(phba, qp->io_wq);
+ lpfc_cq_destroy(phba, qp->io_cq);
+ }
+ /* Loop thru all IRQ vectors */
+ for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+ /* Destroy the EQ corresponding to the IRQ vector */
+ eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
+ lpfc_eq_destroy(phba, eq);
}
}
@@ -10559,11 +10558,12 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
}
/**
- * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ
+ * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified Queue
* @phba: pointer to lpfc hba data structure.
* @id: EQ vector index or Hardware Queue index
* @match: LPFC_FIND_BY_EQ = match by EQ
* LPFC_FIND_BY_HDWQ = match by Hardware Queue
+ * Return the CPU that matches the selection criteria
*/
static uint16_t
lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
@@ -10571,40 +10571,27 @@ lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
struct lpfc_vector_map_info *cpup;
int cpu;
- /* Find the desired phys_id for the specified EQ */
+ /* Loop through all CPUs */
for_each_present_cpu(cpu) {
cpup = &phba->sli4_hba.cpu_map[cpu];
+
+ /* If we are matching by EQ, there may be multiple CPUs using
+ * using the same vector, so select the one with
+ * LPFC_CPU_FIRST_IRQ set.
+ */
if ((match == LPFC_FIND_BY_EQ) &&
+ (cpup->flag & LPFC_CPU_FIRST_IRQ) &&
(cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
(cpup->eq == id))
return cpu;
+
+ /* If matching by HDWQ, select the first CPU that matches */
if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id))
return cpu;
}
return 0;
}
-/**
- * lpfc_find_eq_handle - Find the EQ that corresponds to the specified
- * Hardware Queue
- * @phba: pointer to lpfc hba data structure.
- * @hdwq: Hardware Queue index
- */
-static uint16_t
-lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq)
-{
- struct lpfc_vector_map_info *cpup;
- int cpu;
-
- /* Find the desired phys_id for the specified EQ */
- for_each_present_cpu(cpu) {
- cpup = &phba->sli4_hba.cpu_map[cpu];
- if (cpup->hdwq == hdwq)
- return cpup->eq;
- }
- return 0;
-}
-
#ifdef CONFIG_X86
/**
* lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded
@@ -10645,24 +10632,31 @@ lpfc_find_hyper(struct lpfc_hba *phba, int cpu,
static void
lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
{
- int i, cpu, idx;
+ int i, cpu, idx, next_idx, new_cpu, start_cpu, first_cpu;
int max_phys_id, min_phys_id;
int max_core_id, min_core_id;
struct lpfc_vector_map_info *cpup;
+ struct lpfc_vector_map_info *new_cpup;
const struct cpumask *maskp;
#ifdef CONFIG_X86
struct cpuinfo_x86 *cpuinfo;
#endif
/* Init cpu_map array */
- memset(phba->sli4_hba.cpu_map, 0xff,
- (sizeof(struct lpfc_vector_map_info) *
- phba->sli4_hba.num_possible_cpu));
+ for_each_possible_cpu(cpu) {
+ cpup = &phba->sli4_hba.cpu_map[cpu];
+ cpup->phys_id = LPFC_VECTOR_MAP_EMPTY;
+ cpup->core_id = LPFC_VECTOR_MAP_EMPTY;
+ cpup->hdwq = LPFC_VECTOR_MAP_EMPTY;
+ cpup->eq = LPFC_VECTOR_MAP_EMPTY;
+ cpup->irq = LPFC_VECTOR_MAP_EMPTY;
+ cpup->flag = 0;
+ }
max_phys_id = 0;
- min_phys_id = 0xffff;
+ min_phys_id = LPFC_VECTOR_MAP_EMPTY;
max_core_id = 0;
- min_core_id = 0xffff;
+ min_core_id = LPFC_VECTOR_MAP_EMPTY;
/* Update CPU map with physical id and core id of each CPU */
for_each_present_cpu(cpu) {
@@ -10671,18 +10665,17 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
cpuinfo = &cpu_data(cpu);
cpup->phys_id = cpuinfo->phys_proc_id;
cpup->core_id = cpuinfo->cpu_core_id;
- cpup->hyper = lpfc_find_hyper(phba, cpu,
- cpup->phys_id, cpup->core_id);
+ if (lpfc_find_hyper(phba, cpu, cpup->phys_id, cpup->core_id))
+ cpup->flag |= LPFC_CPU_MAP_HYPER;
#else
/* No distinction between CPUs for other platforms */
cpup->phys_id = 0;
cpup->core_id = cpu;
- cpup->hyper = 0;
#endif
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
- "3328 CPU physid %d coreid %d\n",
- cpup->phys_id, cpup->core_id);
+ "3328 CPU %d physid %d coreid %d flag x%x\n",
+ cpu, cpup->phys_id, cpup->core_id, cpup->flag);
if (cpup->phys_id > max_phys_id)
max_phys_id = cpup->phys_id;
@@ -10703,23 +10696,269 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
eqi->icnt = 0;
}
+ /* This loop sets up all CPUs that are affinitized with a
+ * irq vector assigned to the driver. All affinitized CPUs
+ * will get a link to that vectors IRQ and EQ.
+ *
+ * NULL affinity mask handling:
+ * If irq count is greater than one, log an error message.
+ * If the null mask is received for the first irq, find the
+ * first present cpu, and assign the eq index to ensure at
+ * least one EQ is assigned.
+ */
for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
+ /* Get a CPU mask for all CPUs affinitized to this vector */
maskp = pci_irq_get_affinity(phba->pcidev, idx);
- if (!maskp)
- continue;
+ if (!maskp) {
+ if (phba->cfg_irq_chann > 1)
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3329 No affinity mask found "
+ "for vector %d (%d)\n",
+ idx, phba->cfg_irq_chann);
+ if (!idx) {
+ cpu = cpumask_first(cpu_present_mask);
+ cpup = &phba->sli4_hba.cpu_map[cpu];
+ cpup->eq = idx;
+ cpup->irq = pci_irq_vector(phba->pcidev, idx);
+ cpup->flag |= LPFC_CPU_FIRST_IRQ;
+ }
+ break;
+ }
+ i = 0;
+ /* Loop through all CPUs associated with vector idx */
for_each_cpu_and(cpu, maskp, cpu_present_mask) {
+ /* Set the EQ index and IRQ for that vector */
cpup = &phba->sli4_hba.cpu_map[cpu];
cpup->eq = idx;
- cpup->hdwq = idx;
cpup->irq = pci_irq_vector(phba->pcidev, idx);
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ /* If this is the first CPU thats assigned to this
+ * vector, set LPFC_CPU_FIRST_IRQ.
+ */
+ if (!i)
+ cpup->flag |= LPFC_CPU_FIRST_IRQ;
+ i++;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"3336 Set Affinity: CPU %d "
- "hdwq %d irq %d\n",
- cpu, cpup->hdwq, cpup->irq);
+ "irq %d eq %d flag x%x\n",
+ cpu, cpup->irq, cpup->eq, cpup->flag);
+ }
+ }
+
+ /* After looking at each irq vector assigned to this pcidev, its
+ * possible to see that not ALL CPUs have been accounted for.
+ * Next we will set any unassigned (unaffinitized) cpu map
+ * entries to a IRQ on the same phys_id.
+ */
+ first_cpu = cpumask_first(cpu_present_mask);
+ start_cpu = first_cpu;
+
+ for_each_present_cpu(cpu) {
+ cpup = &phba->sli4_hba.cpu_map[cpu];
+
+ /* Is this CPU entry unassigned */
+ if (cpup->eq == LPFC_VECTOR_MAP_EMPTY) {
+ /* Mark CPU as IRQ not assigned by the kernel */
+ cpup->flag |= LPFC_CPU_MAP_UNASSIGN;
+
+ /* If so, find a new_cpup thats on the the SAME
+ * phys_id as cpup. start_cpu will start where we
+ * left off so all unassigned entries don't get assgined
+ * the IRQ of the first entry.
+ */
+ new_cpu = start_cpu;
+ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+ new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+ if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) &&
+ (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+ (new_cpup->phys_id == cpup->phys_id))
+ goto found_same;
+ new_cpu = cpumask_next(
+ new_cpu, cpu_present_mask);
+ if (new_cpu == nr_cpumask_bits)
+ new_cpu = first_cpu;
+ }
+ /* At this point, we leave the CPU as unassigned */
+ continue;
+found_same:
+ /* We found a matching phys_id, so copy the IRQ info */
+ cpup->eq = new_cpup->eq;
+ cpup->irq = new_cpup->irq;
+
+ /* Bump start_cpu to the next slot to minmize the
+ * chance of having multiple unassigned CPU entries
+ * selecting the same IRQ.
+ */
+ start_cpu = cpumask_next(new_cpu, cpu_present_mask);
+ if (start_cpu == nr_cpumask_bits)
+ start_cpu = first_cpu;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3337 Set Affinity: CPU %d "
+ "irq %d from id %d same "
+ "phys_id (%d)\n",
+ cpu, cpup->irq, new_cpu, cpup->phys_id);
+ }
+ }
+
+ /* Set any unassigned cpu map entries to a IRQ on any phys_id */
+ start_cpu = first_cpu;
+
+ for_each_present_cpu(cpu) {
+ cpup = &phba->sli4_hba.cpu_map[cpu];
+
+ /* Is this entry unassigned */
+ if (cpup->eq == LPFC_VECTOR_MAP_EMPTY) {
+ /* Mark it as IRQ not assigned by the kernel */
+ cpup->flag |= LPFC_CPU_MAP_UNASSIGN;
+
+ /* If so, find a new_cpup thats on ANY phys_id
+ * as the cpup. start_cpu will start where we
+ * left off so all unassigned entries don't get
+ * assigned the IRQ of the first entry.
+ */
+ new_cpu = start_cpu;
+ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+ new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+ if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) &&
+ (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY))
+ goto found_any;
+ new_cpu = cpumask_next(
+ new_cpu, cpu_present_mask);
+ if (new_cpu == nr_cpumask_bits)
+ new_cpu = first_cpu;
+ }
+ /* We should never leave an entry unassigned */
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3339 Set Affinity: CPU %d "
+ "irq %d UNASSIGNED\n",
+ cpup->hdwq, cpup->irq);
+ continue;
+found_any:
+ /* We found an available entry, copy the IRQ info */
+ cpup->eq = new_cpup->eq;
+ cpup->irq = new_cpup->irq;
+
+ /* Bump start_cpu to the next slot to minmize the
+ * chance of having multiple unassigned CPU entries
+ * selecting the same IRQ.
+ */
+ start_cpu = cpumask_next(new_cpu, cpu_present_mask);
+ if (start_cpu == nr_cpumask_bits)
+ start_cpu = first_cpu;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3338 Set Affinity: CPU %d "
+ "irq %d from id %d (%d/%d)\n",
+ cpu, cpup->irq, new_cpu,
+ new_cpup->phys_id, new_cpup->core_id);
+ }
+ }
+
+ /* Assign hdwq indices that are unique across all cpus in the map
+ * that are also FIRST_CPUs.
+ */
+ idx = 0;
+ for_each_present_cpu(cpu) {
+ cpup = &phba->sli4_hba.cpu_map[cpu];
+
+ /* Only FIRST IRQs get a hdwq index assignment. */
+ if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
+ continue;
+
+ /* 1 to 1, the first LPFC_CPU_FIRST_IRQ cpus to a unique hdwq */
+ cpup->hdwq = idx;
+ idx++;
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3333 Set Affinity: CPU %d (phys %d core %d): "
+ "hdwq %d eq %d irq %d flg x%x\n",
+ cpu, cpup->phys_id, cpup->core_id,
+ cpup->hdwq, cpup->eq, cpup->irq, cpup->flag);
+ }
+ /* Finally we need to associate a hdwq with each cpu_map entry
+ * This will be 1 to 1 - hdwq to cpu, unless there are less
+ * hardware queues then CPUs. For that case we will just round-robin
+ * the available hardware queues as they get assigned to CPUs.
+ * The next_idx is the idx from the FIRST_CPU loop above to account
+ * for irq_chann < hdwq. The idx is used for round-robin assignments
+ * and needs to start at 0.
+ */
+ next_idx = idx;
+ start_cpu = 0;
+ idx = 0;
+ for_each_present_cpu(cpu) {
+ cpup = &phba->sli4_hba.cpu_map[cpu];
+
+ /* FIRST cpus are already mapped. */
+ if (cpup->flag & LPFC_CPU_FIRST_IRQ)
+ continue;
+
+ /* If the cfg_irq_chann < cfg_hdw_queue, set the hdwq
+ * of the unassigned cpus to the next idx so that all
+ * hdw queues are fully utilized.
+ */
+ if (next_idx < phba->cfg_hdw_queue) {
+ cpup->hdwq = next_idx;
+ next_idx++;
+ continue;
+ }
+
+ /* Not a First CPU and all hdw_queues are used. Reuse a
+ * Hardware Queue for another CPU, so be smart about it
+ * and pick one that has its IRQ/EQ mapped to the same phys_id
+ * (CPU package) and core_id.
+ */
+ new_cpu = start_cpu;
+ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+ new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+ if (new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY &&
+ new_cpup->phys_id == cpup->phys_id &&
+ new_cpup->core_id == cpup->core_id) {
+ goto found_hdwq;
+ }
+ new_cpu = cpumask_next(new_cpu, cpu_present_mask);
+ if (new_cpu == nr_cpumask_bits)
+ new_cpu = first_cpu;
}
+
+ /* If we can't match both phys_id and core_id,
+ * settle for just a phys_id match.
+ */
+ new_cpu = start_cpu;
+ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+ new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+ if (new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY &&
+ new_cpup->phys_id == cpup->phys_id)
+ goto found_hdwq;
+
+ new_cpu = cpumask_next(new_cpu, cpu_present_mask);
+ if (new_cpu == nr_cpumask_bits)
+ new_cpu = first_cpu;
+ }
+
+ /* Otherwise just round robin on cfg_hdw_queue */
+ cpup->hdwq = idx % phba->cfg_hdw_queue;
+ idx++;
+ goto logit;
+ found_hdwq:
+ /* We found an available entry, copy the IRQ info */
+ start_cpu = cpumask_next(new_cpu, cpu_present_mask);
+ if (start_cpu == nr_cpumask_bits)
+ start_cpu = first_cpu;
+ cpup->hdwq = new_cpup->hdwq;
+ logit:
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3335 Set Affinity: CPU %d (phys %d core %d): "
+ "hdwq %d eq %d irq %d flg x%x\n",
+ cpu, cpup->phys_id, cpup->core_id,
+ cpup->hdwq, cpup->eq, cpup->irq, cpup->flag);
}
+
+ /* The cpu_map array will be used later during initialization
+ * when EQ / CQ / WQs are allocated and configured.
+ */
return;
}
@@ -10805,10 +11044,10 @@ vec_fail_out:
* @phba: pointer to lpfc hba data structure.
*
* This routine is invoked to enable the MSI interrupt mode to device with
- * SLI-4 interface spec. The kernel function pci_enable_msi() is called
- * to enable the MSI vector. The device driver is responsible for calling
- * the request_irq() to register MSI vector with a interrupt the handler,
- * which is done in this function.
+ * SLI-4 interface spec. The kernel function pci_alloc_irq_vectors() is
+ * called to enable the MSI vector. The device driver is responsible for
+ * calling the request_irq() to register MSI vector with a interrupt the
+ * handler, which is done in this function.
*
* Return codes
* 0 - successful
@@ -10819,20 +11058,21 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
{
int rc, index;
- rc = pci_enable_msi(phba->pcidev);
- if (!rc)
+ rc = pci_alloc_irq_vectors(phba->pcidev, 1, 1,
+ PCI_IRQ_MSI | PCI_IRQ_AFFINITY);
+ if (rc > 0)
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"0487 PCI enable MSI mode success.\n");
else {
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"0488 PCI enable MSI mode failed (%d)\n", rc);
- return rc;
+ return rc ? rc : -1;
}
rc = request_irq(phba->pcidev->irq, lpfc_sli4_intr_handler,
0, LPFC_DRIVER_NAME, phba);
if (rc) {
- pci_disable_msi(phba->pcidev);
+ pci_free_irq_vectors(phba->pcidev);
lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
"0490 MSI request_irq failed (%d)\n", rc);
return rc;
@@ -10998,11 +11238,10 @@ static void
lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
{
struct lpfc_sli4_hdw_queue *qp;
- int idx, ccnt, fcnt;
+ int idx, ccnt;
int wait_time = 0;
int io_xri_cmpl = 1;
int nvmet_xri_cmpl = 1;
- int fcp_xri_cmpl = 1;
int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
/* Driver just aborted IOs during the hba_unset process. Pause
@@ -11016,32 +11255,21 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
lpfc_nvme_wait_for_io_drain(phba);
ccnt = 0;
- fcnt = 0;
for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
qp = &phba->sli4_hba.hdwq[idx];
- fcp_xri_cmpl = list_empty(
- &qp->lpfc_abts_scsi_buf_list);
- if (!fcp_xri_cmpl) /* if list is NOT empty */
- fcnt++;
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- io_xri_cmpl = list_empty(
- &qp->lpfc_abts_nvme_buf_list);
- if (!io_xri_cmpl) /* if list is NOT empty */
- ccnt++;
- }
+ io_xri_cmpl = list_empty(&qp->lpfc_abts_io_buf_list);
+ if (!io_xri_cmpl) /* if list is NOT empty */
+ ccnt++;
}
if (ccnt)
io_xri_cmpl = 0;
- if (fcnt)
- fcp_xri_cmpl = 0;
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
nvmet_xri_cmpl =
list_empty(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
}
- while (!fcp_xri_cmpl || !els_xri_cmpl || !io_xri_cmpl ||
- !nvmet_xri_cmpl) {
+ while (!els_xri_cmpl || !io_xri_cmpl || !nvmet_xri_cmpl) {
if (wait_time > LPFC_XRI_EXCH_BUSY_WAIT_TMO) {
if (!nvmet_xri_cmpl)
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -11050,12 +11278,7 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
wait_time/1000);
if (!io_xri_cmpl)
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "6100 NVME XRI exchange busy "
- "wait time: %d seconds.\n",
- wait_time/1000);
- if (!fcp_xri_cmpl)
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "2877 FCP XRI exchange busy "
+ "6100 IO XRI exchange busy "
"wait time: %d seconds.\n",
wait_time/1000);
if (!els_xri_cmpl)
@@ -11071,24 +11294,15 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
}
ccnt = 0;
- fcnt = 0;
for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
qp = &phba->sli4_hba.hdwq[idx];
- fcp_xri_cmpl = list_empty(
- &qp->lpfc_abts_scsi_buf_list);
- if (!fcp_xri_cmpl) /* if list is NOT empty */
- fcnt++;
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- io_xri_cmpl = list_empty(
- &qp->lpfc_abts_nvme_buf_list);
- if (!io_xri_cmpl) /* if list is NOT empty */
- ccnt++;
- }
+ io_xri_cmpl = list_empty(
+ &qp->lpfc_abts_io_buf_list);
+ if (!io_xri_cmpl) /* if list is NOT empty */
+ ccnt++;
}
if (ccnt)
io_xri_cmpl = 0;
- if (fcnt)
- fcp_xri_cmpl = 0;
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
nvmet_xri_cmpl = list_empty(
@@ -11331,24 +11545,56 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
mbx_sli4_parameters);
phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters);
phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters);
- phba->nvme_support = (bf_get(cfg_nvme, mbx_sli4_parameters) &&
- bf_get(cfg_xib, mbx_sli4_parameters));
-
- if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP) ||
- !phba->nvme_support) {
- phba->nvme_support = 0;
- phba->nvmet_support = 0;
- phba->cfg_nvmet_mrq = 0;
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME,
- "6101 Disabling NVME support: "
- "Not supported by firmware: %d %d\n",
- bf_get(cfg_nvme, mbx_sli4_parameters),
- bf_get(cfg_xib, mbx_sli4_parameters));
-
- /* If firmware doesn't support NVME, just use SCSI support */
- if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
- return -ENODEV;
- phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
+
+ /* Check for Extended Pre-Registered SGL support */
+ phba->cfg_xpsgl = bf_get(cfg_xpsgl, mbx_sli4_parameters);
+
+ /* Check for firmware nvme support */
+ rc = (bf_get(cfg_nvme, mbx_sli4_parameters) &&
+ bf_get(cfg_xib, mbx_sli4_parameters));
+
+ if (rc) {
+ /* Save this to indicate the Firmware supports NVME */
+ sli4_params->nvme = 1;
+
+ /* Firmware NVME support, check driver FC4 NVME support */
+ if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_NVME,
+ "6133 Disabling NVME support: "
+ "FC4 type not supported: x%x\n",
+ phba->cfg_enable_fc4_type);
+ goto fcponly;
+ }
+ } else {
+ /* No firmware NVME support, check driver FC4 NVME support */
+ sli4_params->nvme = 0;
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME,
+ "6101 Disabling NVME support: Not "
+ "supported by firmware (%d %d) x%x\n",
+ bf_get(cfg_nvme, mbx_sli4_parameters),
+ bf_get(cfg_xib, mbx_sli4_parameters),
+ phba->cfg_enable_fc4_type);
+fcponly:
+ phba->nvme_support = 0;
+ phba->nvmet_support = 0;
+ phba->cfg_nvmet_mrq = 0;
+ phba->cfg_nvme_seg_cnt = 0;
+
+ /* If no FC4 type support, move to just SCSI support */
+ if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+ return -ENODEV;
+ phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
+ }
+ }
+
+ /* If the NVME FC4 type is enabled, scale the sg_seg_cnt to
+ * accommodate 512K and 1M IOs in a single nvme buf and supply
+ * enough NVME LS iocb buffers for larger connectivity counts.
+ */
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+ phba->cfg_sg_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
+ phba->cfg_iocb_cnt = 5;
}
/* Only embed PBDE for if_type 6, PBDE support requires xib be set */
@@ -11415,6 +11661,14 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
else
phba->mds_diags_support = 0;
+ /*
+ * Check if the SLI port supports NSLER
+ */
+ if (bf_get(cfg_nsler, mbx_sli4_parameters))
+ phba->nsler = 1;
+ else
+ phba->nsler = 0;
+
return 0;
}
@@ -11843,7 +12097,7 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
lpfc_scsi_dev_block(phba);
/* Flush all driver's outstanding SCSI I/Os as we are to reset */
- lpfc_sli_flush_fcp_rings(phba);
+ lpfc_sli_flush_io_rings(phba);
/* stop all timers */
lpfc_stop_hba_timers(phba);
@@ -11873,7 +12127,7 @@ lpfc_sli_prep_dev_for_perm_failure(struct lpfc_hba *phba)
lpfc_stop_hba_timers(phba);
/* Clean up all driver's outstanding SCSI I/Os */
- lpfc_sli_flush_fcp_rings(phba);
+ lpfc_sli_flush_io_rings(phba);
}
/**
@@ -12645,12 +12899,8 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
/* Block all SCSI devices' I/Os on the host */
lpfc_scsi_dev_block(phba);
- /* Flush all driver's outstanding SCSI I/Os as we are to reset */
- lpfc_sli_flush_fcp_rings(phba);
-
- /* Flush the outstanding NVME IOs if fc4 type enabled. */
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
- lpfc_sli_flush_nvme_rings(phba);
+ /* Flush all driver's outstanding I/Os as we are to reset */
+ lpfc_sli_flush_io_rings(phba);
/* stop all timers */
lpfc_stop_hba_timers(phba);
@@ -12681,12 +12931,8 @@ lpfc_sli4_prep_dev_for_perm_failure(struct lpfc_hba *phba)
/* stop all timers */
lpfc_stop_hba_timers(phba);
- /* Clean up all driver's outstanding SCSI I/Os */
- lpfc_sli_flush_fcp_rings(phba);
-
- /* Flush the outstanding NVME IOs if fc4 type enabled. */
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
- lpfc_sli_flush_nvme_rings(phba);
+ /* Clean up all driver's outstanding I/Os */
+ lpfc_sli_flush_io_rings(phba);
}
/**
@@ -13227,19 +13473,6 @@ lpfc_exit(void)
pci_unregister_driver(&lpfc_driver);
fc_release_transport(lpfc_transport_template);
fc_release_transport(lpfc_vport_transport_template);
- if (_dump_buf_data) {
- printk(KERN_ERR "9062 BLKGRD: freeing %lu pages for "
- "_dump_buf_data at 0x%p\n",
- (1L << _dump_buf_data_order), _dump_buf_data);
- free_pages((unsigned long)_dump_buf_data, _dump_buf_data_order);
- }
-
- if (_dump_buf_dif) {
- printk(KERN_ERR "9049 BLKGRD: freeing %lu pages for "
- "_dump_buf_dif at 0x%p\n",
- (1L << _dump_buf_dif_order), _dump_buf_dif);
- free_pages((unsigned long)_dump_buf_dif, _dump_buf_dif_order);
- }
idr_destroy(&lpfc_hba_index);
}