summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Metzmacher <metze@samba.org>2025-10-12 21:10:30 +0200
committerSteve French <stfrench@microsoft.com>2025-10-15 07:44:17 -0500
commitb0432201a11b3caaeca6c03f2b3e399275b2e489 (patch)
tree57bd72fdea671353028340d4797bf9825e1f3a83
parent1ef0e16c3d7ca07432987840d8eef1a9ffb67dec (diff)
smb: client: let destroy_mr_list() keep smbdirect_mr_io memory if registered
If a smbdirect_mr_io structure if still visible to callers of smbd_register_mr() we can't free the related memory when the connection is disconnected! Otherwise smbd_deregister_mr() will crash. Now we use a mutex and refcounting in order to keep the memory around if the connection is disconnected. It means smbd_deregister_mr() can be called at any later time to free the memory, which is no longer referenced by nor referencing the connection. It also means smbd_destroy() no longer needs to wait for mr_io.used.count to become 0. Fixes: 050b8c374019 ("smbd: Make upper layer decide when to destroy the transport") Cc: Steve French <smfrench@gmail.com> Cc: Tom Talpey <tom@talpey.com> Cc: Long Li <longli@microsoft.com> Cc: Namjae Jeon <linkinjeon@kernel.org> Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher <metze@samba.org> Signed-off-by: Steve French <stfrench@microsoft.com>
-rw-r--r--fs/smb/client/smbdirect.c146
1 files changed, 127 insertions, 19 deletions
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index c3330e43488f..77de85d7cdc3 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -1624,19 +1624,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
log_rdma_event(INFO, "free receive buffers\n");
destroy_receive_buffers(sc);
- /*
- * For performance reasons, memory registration and deregistration
- * are not locked by srv_mutex. It is possible some processes are
- * blocked on transport srv_mutex while holding memory registration.
- * Release the transport srv_mutex to allow them to hit the failure
- * path when sending data, and then release memory registrations.
- */
log_rdma_event(INFO, "freeing mr list\n");
- while (atomic_read(&sc->mr_io.used.count)) {
- cifs_server_unlock(server);
- msleep(1000);
- cifs_server_lock(server);
- }
destroy_mr_list(sc);
ib_free_cq(sc->ib.send_cq);
@@ -2352,6 +2340,46 @@ static void smbd_mr_recovery_work(struct work_struct *work)
}
}
+static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr)
+{
+ struct smbdirect_socket *sc = mr->socket;
+
+ lockdep_assert_held(&mr->mutex);
+
+ if (mr->state == SMBDIRECT_MR_DISABLED)
+ return;
+
+ if (mr->mr)
+ ib_dereg_mr(mr->mr);
+ if (mr->sgt.nents)
+ ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
+ kfree(mr->sgt.sgl);
+
+ mr->mr = NULL;
+ mr->sgt.sgl = NULL;
+ mr->sgt.nents = 0;
+
+ mr->state = SMBDIRECT_MR_DISABLED;
+}
+
+static void smbd_mr_free_locked(struct kref *kref)
+{
+ struct smbdirect_mr_io *mr =
+ container_of(kref, struct smbdirect_mr_io, kref);
+
+ lockdep_assert_held(&mr->mutex);
+
+ /*
+ * smbd_mr_disable_locked() should already be called!
+ */
+ if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED))
+ smbd_mr_disable_locked(mr);
+
+ mutex_unlock(&mr->mutex);
+ mutex_destroy(&mr->mutex);
+ kfree(mr);
+}
+
static void destroy_mr_list(struct smbdirect_socket *sc)
{
struct smbdirect_mr_io *mr, *tmp;
@@ -2365,13 +2393,31 @@ static void destroy_mr_list(struct smbdirect_socket *sc)
spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
list_for_each_entry_safe(mr, tmp, &all_list, list) {
- if (mr->mr)
- ib_dereg_mr(mr->mr);
- if (mr->sgt.nents)
- ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
- kfree(mr->sgt.sgl);
+ mutex_lock(&mr->mutex);
+
+ smbd_mr_disable_locked(mr);
list_del(&mr->list);
- kfree(mr);
+ mr->socket = NULL;
+
+ /*
+ * No kref_put_mutex() as it's already locked.
+ *
+ * If smbd_mr_free_locked() is called
+ * and the mutex is unlocked and mr is gone,
+ * in that case kref_put() returned 1.
+ *
+ * If kref_put() returned 0 we know that
+ * smbd_mr_free_locked() didn't
+ * run. Not by us nor by anyone else, as we
+ * still hold the mutex, so we need to unlock.
+ *
+ * If the mr is still registered it will
+ * be dangling (detached from the connection
+ * waiting for smbd_deregister_mr() to be
+ * called in order to free the memory.
+ */
+ if (!kref_put(&mr->kref, smbd_mr_free_locked))
+ mutex_unlock(&mr->mutex);
}
}
@@ -2402,6 +2448,9 @@ static int allocate_mr_list(struct smbdirect_socket *sc)
goto kzalloc_mr_failed;
}
+ kref_init(&mr->kref);
+ mutex_init(&mr->mutex);
+
mr->mr = ib_alloc_mr(sc->ib.pd,
sc->mr_io.type,
sp->max_frmr_depth);
@@ -2434,6 +2483,7 @@ static int allocate_mr_list(struct smbdirect_socket *sc)
kcalloc_sgl_failed:
ib_dereg_mr(mr->mr);
ib_alloc_mr_failed:
+ mutex_destroy(&mr->mutex);
kfree(mr);
kzalloc_mr_failed:
destroy_mr_list(sc);
@@ -2471,6 +2521,7 @@ again:
list_for_each_entry(ret, &sc->mr_io.all.list, list) {
if (ret->state == SMBDIRECT_MR_READY) {
ret->state = SMBDIRECT_MR_REGISTERED;
+ kref_get(&ret->kref);
spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
atomic_dec(&sc->mr_io.ready.count);
atomic_inc(&sc->mr_io.used.count);
@@ -2535,6 +2586,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
return NULL;
}
+ mutex_lock(&mr->mutex);
+
mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
mr->need_invalidate = need_invalidate;
mr->sgt.nents = 0;
@@ -2578,8 +2631,16 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
* on the next ib_post_send when we actually send I/O to remote peer
*/
rc = ib_post_send(sc->ib.qp, &reg_wr->wr, NULL);
- if (!rc)
+ if (!rc) {
+ /*
+ * get_mr() gave us a reference
+ * via kref_get(&mr->kref), we keep that and let
+ * the caller use smbd_deregister_mr()
+ * to remove it again.
+ */
+ mutex_unlock(&mr->mutex);
return mr;
+ }
log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n",
rc, reg_wr->key);
@@ -2596,6 +2657,25 @@ dma_map_error:
smbd_disconnect_rdma_connection(sc);
+ /*
+ * get_mr() gave us a reference
+ * via kref_get(&mr->kref), we need to remove it again
+ * on error.
+ *
+ * No kref_put_mutex() as it's already locked.
+ *
+ * If smbd_mr_free_locked() is called
+ * and the mutex is unlocked and mr is gone,
+ * in that case kref_put() returned 1.
+ *
+ * If kref_put() returned 0 we know that
+ * smbd_mr_free_locked() didn't
+ * run. Not by us nor by anyone else, as we
+ * still hold the mutex, so we need to unlock.
+ */
+ if (!kref_put(&mr->kref, smbd_mr_free_locked))
+ mutex_unlock(&mr->mutex);
+
return NULL;
}
@@ -2624,6 +2704,15 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr)
{
struct smbdirect_socket *sc = mr->socket;
+ mutex_lock(&mr->mutex);
+ if (mr->state == SMBDIRECT_MR_DISABLED)
+ goto put_kref;
+
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
+ smbd_mr_disable_locked(mr);
+ goto put_kref;
+ }
+
if (mr->need_invalidate) {
struct ib_send_wr *wr = &mr->inv_wr;
int rc;
@@ -2640,6 +2729,7 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr)
rc = ib_post_send(sc->ib.qp, wr, NULL);
if (rc) {
log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
+ smbd_mr_disable_locked(mr);
smbd_disconnect_rdma_connection(sc);
goto done;
}
@@ -2671,6 +2761,24 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr)
done:
if (atomic_dec_and_test(&sc->mr_io.used.count))
wake_up(&sc->mr_io.cleanup.wait_queue);
+
+put_kref:
+ /*
+ * No kref_put_mutex() as it's already locked.
+ *
+ * If smbd_mr_free_locked() is called
+ * and the mutex is unlocked and mr is gone,
+ * in that case kref_put() returned 1.
+ *
+ * If kref_put() returned 0 we know that
+ * smbd_mr_free_locked() didn't
+ * run. Not by us nor by anyone else, as we
+ * still hold the mutex, so we need to unlock
+ * and keep the mr in SMBDIRECT_MR_READY or
+ * SMBDIRECT_MR_ERROR state.
+ */
+ if (!kref_put(&mr->kref, smbd_mr_free_locked))
+ mutex_unlock(&mr->mutex);
}
static bool smb_set_sge(struct smb_extract_to_rdma *rdma,