diff options
author | James Smart <jsmart2021@gmail.com> | 2021-05-14 12:55:49 -0700 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2021-05-21 23:23:27 -0400 |
commit | 01131e7aae5d30e23e3cdd1eebe51bbc5489ae8f (patch) | |
tree | 07a7e20f94ff8c4b023ca6a2abf1ae2edc224f3d /drivers/scsi/lpfc/lpfc_els.c | |
parent | ee8868c5c78f16fb726775741aeab8a233373332 (diff) |
scsi: lpfc: Fix unreleased RPIs when NPIV ports are created
While testing NPIV and watching logins and used RPI levels, it was seen the
used RPI count was much higher than the number of remote ports discovered.
Code inspection showed that remote port removals on any NPIV instance are
releasing the RPI, but not performing an UNREG_RPI with the adapter thus
the reference counting never fully drops and the RPI is never fully
released. This was happening on NPIV nodes due to a log of fabric ELS's to
fabric addresses. This lack of UNREG_RPI was introduced by a prior node
rework patch that performed the UNREG_RPI as part of node cleanup.
To resolve the issue, do the following:
- Restore the RPI release code, but move the location to so that it is in
line with the new node cleanup design.
- NPIV ports now release the RPI and drop the node when the caller sets
the NLP_RELEASE_RPI flag.
- Set the NLP_RELEASE_RPI flag in node cleanup which will trigger a
release of RPI to free pool.
- Ensure there's an UNREG_RPI at LOGO completion so that RPI release is
completed.
- Stop offline_prep from skipping nodes that are UNUSED. The RPI may
not have been released.
- Stop the default RPI handling in lpfc_cmpl_els_rsp() for SLI4.
- Fixed up debugfs RPI displays for better debugging.
Fixes: a70e63eee1c1 ("scsi: lpfc: Fix NPIV Fabric Node reference counting")
Link: https://lore.kernel.org/r/20210514195559.119853-2-jsmart2021@gmail.com
Cc: <stable@vger.kernel.org> # v5.11+
Co-developed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_els.c')
-rw-r--r-- | drivers/scsi/lpfc/lpfc_els.c | 79 |
1 files changed, 63 insertions, 16 deletions
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 21108f322c99..a0ff15e63109 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -2869,6 +2869,11 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, * log into the remote port. */ if (ndlp->nlp_flag & NLP_TARGET_REMOVE) { + spin_lock_irq(&ndlp->lock); + if (phba->sli_rev == LPFC_SLI_REV4) + ndlp->nlp_flag |= NLP_RELEASE_RPI; + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; + spin_unlock_irq(&ndlp->lock); lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_DEVICE_RM); lpfc_els_free_iocb(phba, cmdiocb); @@ -4371,6 +4376,7 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; struct lpfc_vport *vport = cmdiocb->vport; IOCB_t *irsp; + u32 xpt_flags = 0, did_mask = 0; irsp = &rspiocb->iocb; lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, @@ -4386,9 +4392,20 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (ndlp->nlp_state == NLP_STE_NPR_NODE) { /* NPort Recovery mode or node is just allocated */ if (!lpfc_nlp_not_used(ndlp)) { - /* If the ndlp is being used by another discovery - * thread, just unregister the RPI. + /* A LOGO is completing and the node is in NPR state. + * If this a fabric node that cleared its transport + * registration, release the rpi. */ + xpt_flags = SCSI_XPT_REGD | NVME_XPT_REGD; + did_mask = ndlp->nlp_DID & Fabric_DID_MASK; + if (did_mask == Fabric_DID_MASK && + !(ndlp->fc4_xpt_flags & xpt_flags)) { + spin_lock_irq(&ndlp->lock); + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; + if (phba->sli_rev == LPFC_SLI_REV4) + ndlp->nlp_flag |= NLP_RELEASE_RPI; + spin_unlock_irq(&ndlp->lock); + } lpfc_unreg_rpi(vport, ndlp); } else { /* Indicate the node has already released, should @@ -4424,28 +4441,37 @@ lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + u32 mbx_flag = pmb->mbox_flag; + u32 mbx_cmd = pmb->u.mb.mbxCommand; pmb->ctx_buf = NULL; pmb->ctx_ndlp = NULL; - lpfc_mbuf_free(phba, mp->virt, mp->phys); - kfree(mp); - mempool_free(pmb, phba->mbox_mem_pool); if (ndlp) { lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, - "0006 rpi x%x DID:%x flg:%x %d x%px\n", + "0006 rpi x%x DID:%x flg:%x %d x%px " + "mbx_cmd x%x mbx_flag x%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, - kref_read(&ndlp->kref), - ndlp); - /* This is the end of the default RPI cleanup logic for - * this ndlp and it could get released. Clear the nlp_flags to - * prevent any further processing. + kref_read(&ndlp->kref), ndlp, mbx_cmd, + mbx_flag, pmb); + + /* This ends the default/temporary RPI cleanup logic for this + * ndlp and the node and rpi needs to be released. Free the rpi + * first on an UNREG_LOGIN and then release the final + * references. */ + spin_lock_irq(&ndlp->lock); ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND; + if (mbx_cmd == MBX_UNREG_LOGIN) + ndlp->nlp_flag &= ~NLP_UNREG_INP; + spin_unlock_irq(&ndlp->lock); lpfc_nlp_put(ndlp); - lpfc_nlp_not_used(ndlp); + lpfc_drop_node(ndlp->vport, ndlp); } + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + mempool_free(pmb, phba->mbox_mem_pool); return; } @@ -4503,11 +4529,11 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* ELS response tag <ulpIoTag> completes */ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, "0110 ELS response tag x%x completes " - "Data: x%x x%x x%x x%x x%x x%x x%x\n", + "Data: x%x x%x x%x x%x x%x x%x x%x x%x x%px\n", cmdiocb->iocb.ulpIoTag, rspiocb->iocb.ulpStatus, rspiocb->iocb.un.ulpWord[4], rspiocb->iocb.ulpTimeout, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, - ndlp->nlp_rpi); + ndlp->nlp_rpi, kref_read(&ndlp->kref), mbox); if (mbox) { if ((rspiocb->iocb.ulpStatus == 0) && (ndlp->nlp_flag & NLP_ACC_REGLOGIN)) { @@ -4587,6 +4613,16 @@ out: spin_unlock_irq(&ndlp->lock); } + /* An SLI4 NPIV instance wants to drop the node at this point under + * these conditions and release the RPI. + */ + if (phba->sli_rev == LPFC_SLI_REV4 && + (vport && vport->port_type == LPFC_NPIV_PORT) && + ndlp->nlp_flag & NLP_RELEASE_RPI) { + lpfc_sli4_free_rpi(phba, ndlp->nlp_rpi); + lpfc_drop_node(vport, ndlp); + } + /* Release the originating I/O reference. */ lpfc_els_free_iocb(phba, cmdiocb); lpfc_nlp_put(ndlp); @@ -4775,10 +4811,10 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag, lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, "0128 Xmit ELS ACC response Status: x%x, IoTag: x%x, " "XRI: x%x, DID: x%x, nlp_flag: x%x nlp_state: x%x " - "RPI: x%x, fc_flag x%x\n", + "RPI: x%x, fc_flag x%x refcnt %d\n", rc, elsiocb->iotag, elsiocb->sli4_xritag, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, - ndlp->nlp_rpi, vport->fc_flag); + ndlp->nlp_rpi, vport->fc_flag, kref_read(&ndlp->kref)); return 0; } @@ -4856,6 +4892,17 @@ lpfc_els_rsp_reject(struct lpfc_vport *vport, uint32_t rejectError, return 1; } + /* The NPIV instance is rejecting this unsolicited ELS. Make sure the + * node's assigned RPI needs to be released as this node will get + * freed. + */ + if (phba->sli_rev == LPFC_SLI_REV4 && + vport->port_type == LPFC_NPIV_PORT) { + spin_lock_irq(&ndlp->lock); + ndlp->nlp_flag |= NLP_RELEASE_RPI; + spin_unlock_irq(&ndlp->lock); + } + rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0); if (rc == IOCB_ERROR) { lpfc_els_free_iocb(phba, elsiocb); |