summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/rdma.c
diff options
context:
space:
mode:
authorKeith Busch <kbusch@kernel.org>2023-10-27 10:58:12 -0700
committerKeith Busch <kbusch@kernel.org>2023-12-04 08:39:03 -0800
commite6e7f7ac03e40795346f1b2994a05f507ad8d345 (patch)
tree3e48f621f67d05ce07d4c6f71b6ac16a1f346206 /drivers/nvme/host/rdma.c
parent5c687c287c46fadb14644091823298875a5216aa (diff)
nvme: ensure reset state check ordering
A different CPU may be setting the ctrl->state value, so ensure proper barriers to prevent optimizing to a stale state. Normally it isn't a problem to observe the wrong state as it is merely advisory to take a quicker path during initialization and error recovery, but seeing an old state can report unexpected ENETRESET errors when a reset request was in fact successful. Reported-by: Minh Hoang <mh2022@meta.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Keith Busch <kbusch@kernel.org> Signed-off-by: Hannes Reinecke <hare@suse.de>
Diffstat (limited to 'drivers/nvme/host/rdma.c')
-rw-r--r--drivers/nvme/host/rdma.c23
1 files changed, 14 insertions, 9 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6d178d555920..81e2621169e5 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -984,10 +984,11 @@ free_ctrl:
static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
{
+ enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
/* If we are resetting/deleting then do nothing */
- if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
- WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
- ctrl->ctrl.state == NVME_CTRL_LIVE);
+ if (state != NVME_CTRL_CONNECTING) {
+ WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
return;
}
@@ -1059,8 +1060,10 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
* unless we're during creation of a new controller to
* avoid races with teardown flow.
*/
- WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
- ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
+ enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
+ WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
+ state != NVME_CTRL_DELETING_NOIO);
WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
@@ -1129,8 +1132,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
- WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
- ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
+ enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
+ WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
+ state != NVME_CTRL_DELETING_NOIO);
return;
}
@@ -1162,7 +1167,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
- if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+ if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
dev_info(ctrl->ctrl.device,
"%s for CQE 0x%p failed with status %s (%d)\n",
op, wc->wr_cqe,
@@ -1945,7 +1950,7 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
rq->tag, nvme_rdma_queue_idx(queue));
- if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
+ if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
/*
* If we are resetting, connecting or deleting we should
* complete immediately because we may block controller