diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-29 14:57:08 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-29 14:57:08 -0700 |
| commit | a9401710a5f5681abd2a6f21f9e76bc9f2e81891 (patch) | |
| tree | 37cecdacdfd7edaa7a68c64da360b2b25b03fdf3 | |
| parent | e445fba2d76369d72b497ecadf6b9787930693d9 (diff) | |
| parent | 1b53426334c3c942db47e0959a2527a4f815af50 (diff) | |
Merge tag 'v6.18-rc-part1-smb3-common' of git://git.samba.org/ksmbd
Pull smb restructuring updates from Steve French:
"Large set of small restructuring smbdirect related patches for cifs.ko
and ksmbd.ko.
This is the next step in order to use common structures for smbdirect
handling across both modules. And also includes improved handling of
broken connections, as well as fixed negotiation as rdma resources.
Moving to common functions is planned for 6.19, as well as also
providing smbdirect via sockets to userspace (e.g. for samba to also
be able to use smbdirect for userspace server and userspace client
tools).
This was heavily reviewed and tested at the recent SMB3.1.1 test event
at SDC"
* tag 'v6.18-rc-part1-smb3-common' of git://git.samba.org/ksmbd: (159 commits)
smb: server: let smb_direct_flush_send_list() invalidate a remote key first
smb: server: make use of ib_alloc_cq_any() instead of ib_alloc_cq()
smb: server: make consitent use of spin_lock_irq{save,restore}() in transport_rdma.c
smb: server: let {free_transport,smb_direct_disconnect_rdma_{work,connection}}() wake up all wait queues
smb: server: let smb_direct_disconnect_rdma_connection() disable all work but disconnect_work
smb: server: fill in smbdirect_socket.first_error on error
smb: server: let smb_direct_disconnect_rdma_connection() set SMBDIRECT_SOCKET_ERROR...
smb: server: pass struct smbdirect_socket to smb_direct_send_negotiate_response()
smb: server: pass struct smbdirect_socket to {enqueue,get_first}_reassembly()
smb: server: pass struct smbdirect_socket to smb_direct_post_send_data()
smb: server: pass struct smbdirect_socket to post_sendmsg()
smb: server: pass struct smbdirect_socket to smb_direct_create_header()
smb: server: pass struct smbdirect_socket to manage_keep_alive_before_sending()
smb: server: pass struct smbdirect_socket to manage_credits_prior_sending()
smb: server: pass struct smbdirect_socket to calc_rw_credits()
smb: server: pass struct smbdirect_socket to wait_for_rw_credits()
smb: server: pass struct smbdirect_socket to wait_for_send_credits()
smb: server: pass struct smbdirect_socket to wait_for_credits()
smb: server: pass struct smbdirect_socket to smb_direct_flush_send_list()
smb: server: pass struct smbdirect_socket to smb_direct_post_send()
...
| -rw-r--r-- | fs/smb/client/cifs_debug.c | 81 | ||||
| -rw-r--r-- | fs/smb/client/cifsglob.h | 9 | ||||
| -rw-r--r-- | fs/smb/client/file.c | 16 | ||||
| -rw-r--r-- | fs/smb/client/smb2ops.c | 8 | ||||
| -rw-r--r-- | fs/smb/client/smb2pdu.c | 2 | ||||
| -rw-r--r-- | fs/smb/client/smbdirect.c | 1199 | ||||
| -rw-r--r-- | fs/smb/client/smbdirect.h | 102 | ||||
| -rw-r--r-- | fs/smb/common/smbdirect/smbdirect.h | 7 | ||||
| -rw-r--r-- | fs/smb/common/smbdirect/smbdirect_socket.h | 319 | ||||
| -rw-r--r-- | fs/smb/server/connection.c | 4 | ||||
| -rw-r--r-- | fs/smb/server/connection.h | 10 | ||||
| -rw-r--r-- | fs/smb/server/server.c | 1 | ||||
| -rw-r--r-- | fs/smb/server/smb2pdu.c | 23 | ||||
| -rw-r--r-- | fs/smb/server/smb2pdu.h | 6 | ||||
| -rw-r--r-- | fs/smb/server/transport_rdma.c | 1615 | ||||
| -rw-r--r-- | fs/smb/server/transport_rdma.h | 45 |
16 files changed, 2061 insertions, 1386 deletions
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 2337cf795db3..35c4d27d2cc0 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -24,6 +24,7 @@ #endif #ifdef CONFIG_CIFS_SMB_DIRECT #include "smbdirect.h" +#include "../common/smbdirect/smbdirect_pdu.h" #endif #include "cifs_swn.h" #include "cached_dir.h" @@ -456,57 +457,55 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) sc = &server->smbd_conn->socket; sp = &sc->parameters; - seq_printf(m, "\nSMBDirect (in hex) protocol version: %x " - "transport status: %x", - server->smbd_conn->protocol, - server->smbd_conn->socket.status); - seq_printf(m, "\nConn receive_credit_max: %x " - "send_credit_target: %x max_send_size: %x", + seq_printf(m, "\nSMBDirect protocol version: 0x%x " + "transport status: %s (%u)", + SMBDIRECT_V1, + smbdirect_socket_status_string(sc->status), + sc->status); + seq_printf(m, "\nConn receive_credit_max: %u " + "send_credit_target: %u max_send_size: %u", sp->recv_credit_max, sp->send_credit_target, sp->max_send_size); - seq_printf(m, "\nConn max_fragmented_recv_size: %x " - "max_fragmented_send_size: %x max_receive_size:%x", + seq_printf(m, "\nConn max_fragmented_recv_size: %u " + "max_fragmented_send_size: %u max_receive_size:%u", sp->max_fragmented_recv_size, sp->max_fragmented_send_size, sp->max_recv_size); - seq_printf(m, "\nConn keep_alive_interval: %x " - "max_readwrite_size: %x rdma_readwrite_threshold: %x", + seq_printf(m, "\nConn keep_alive_interval: %u " + "max_readwrite_size: %u rdma_readwrite_threshold: %u", sp->keepalive_interval_msec * 1000, sp->max_read_write_size, - server->smbd_conn->rdma_readwrite_threshold); - seq_printf(m, "\nDebug count_get_receive_buffer: %x " - "count_put_receive_buffer: %x count_send_empty: %x", - server->smbd_conn->count_get_receive_buffer, - server->smbd_conn->count_put_receive_buffer, - server->smbd_conn->count_send_empty); - seq_printf(m, "\nRead Queue count_reassembly_queue: %x " - "count_enqueue_reassembly_queue: %x " - "count_dequeue_reassembly_queue: %x " - "reassembly_data_length: %x " - "reassembly_queue_length: %x", - server->smbd_conn->count_reassembly_queue, - server->smbd_conn->count_enqueue_reassembly_queue, - server->smbd_conn->count_dequeue_reassembly_queue, + server->rdma_readwrite_threshold); + seq_printf(m, "\nDebug count_get_receive_buffer: %llu " + "count_put_receive_buffer: %llu count_send_empty: %llu", + sc->statistics.get_receive_buffer, + sc->statistics.put_receive_buffer, + sc->statistics.send_empty); + seq_printf(m, "\nRead Queue " + "count_enqueue_reassembly_queue: %llu " + "count_dequeue_reassembly_queue: %llu " + "reassembly_data_length: %u " + "reassembly_queue_length: %u", + sc->statistics.enqueue_reassembly_queue, + sc->statistics.dequeue_reassembly_queue, sc->recv_io.reassembly.data_length, sc->recv_io.reassembly.queue_length); - seq_printf(m, "\nCurrent Credits send_credits: %x " - "receive_credits: %x receive_credit_target: %x", - atomic_read(&server->smbd_conn->send_credits), - atomic_read(&server->smbd_conn->receive_credits), - server->smbd_conn->receive_credit_target); - seq_printf(m, "\nPending send_pending: %x ", - atomic_read(&server->smbd_conn->send_pending)); - seq_printf(m, "\nReceive buffers count_receive_queue: %x ", - server->smbd_conn->count_receive_queue); - seq_printf(m, "\nMR responder_resources: %x " - "max_frmr_depth: %x mr_type: %x", - server->smbd_conn->responder_resources, - server->smbd_conn->max_frmr_depth, - server->smbd_conn->mr_type); - seq_printf(m, "\nMR mr_ready_count: %x mr_used_count: %x", - atomic_read(&server->smbd_conn->mr_ready_count), - atomic_read(&server->smbd_conn->mr_used_count)); + seq_printf(m, "\nCurrent Credits send_credits: %u " + "receive_credits: %u receive_credit_target: %u", + atomic_read(&sc->send_io.credits.count), + atomic_read(&sc->recv_io.credits.count), + sc->recv_io.credits.target); + seq_printf(m, "\nPending send_pending: %u ", + atomic_read(&sc->send_io.pending.count)); + seq_printf(m, "\nMR responder_resources: %u " + "max_frmr_depth: %u mr_type: 0x%x", + sp->responder_resources, + sp->max_frmr_depth, + sc->mr_io.type); + seq_printf(m, "\nMR mr_ready_count: %u mr_used_count: %u", + atomic_read(&sc->mr_io.ready.count), + atomic_read(&sc->mr_io.used.count)); skip_rdma: #endif seq_printf(m, "\nNumber of credits: %d,%d,%d Dialect 0x%x", diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 0fae95cf81c4..3ac254e123dc 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -814,6 +814,13 @@ struct TCP_Server_Info { unsigned int max_read; unsigned int max_write; unsigned int min_offload; + /* + * If payload is less than or equal to the threshold, + * use RDMA send/recv to send upper layer I/O. + * If payload is more than the threshold, + * use RDMA read/write through memory registration for I/O. + */ + unsigned int rdma_readwrite_threshold; unsigned int retrans; struct { bool requested; /* "compress" mount option set*/ @@ -1540,7 +1547,7 @@ struct cifs_io_subrequest { struct kvec iov[2]; struct TCP_Server_Info *server; #ifdef CONFIG_CIFS_SMB_DIRECT - struct smbd_mr *mr; + struct smbdirect_mr_io *mr; #endif struct cifs_credits credits; }; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index cb907e18cc35..a5ed742afa00 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -97,8 +97,12 @@ retry: cifs_trace_rw_credits_write_prepare); #ifdef CONFIG_CIFS_SMB_DIRECT - if (server->smbd_conn) - stream->sreq_max_segs = server->smbd_conn->max_frmr_depth; + if (server->smbd_conn) { + const struct smbdirect_socket_parameters *sp = + smbd_get_parameters(server->smbd_conn); + + stream->sreq_max_segs = sp->max_frmr_depth; + } #endif } @@ -187,8 +191,12 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) cifs_trace_rw_credits_read_submit); #ifdef CONFIG_CIFS_SMB_DIRECT - if (server->smbd_conn) - rreq->io_streams[0].sreq_max_segs = server->smbd_conn->max_frmr_depth; + if (server->smbd_conn) { + const struct smbdirect_socket_parameters *sp = + smbd_get_parameters(server->smbd_conn); + + rreq->io_streams[0].sreq_max_segs = sp->max_frmr_depth; + } #endif return 0; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index e586f3f4b5c9..4711a23c5b38 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -504,8 +504,8 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) wsize = min_t(unsigned int, wsize, server->max_write); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { - struct smbdirect_socket_parameters *sp = - &server->smbd_conn->socket.parameters; + const struct smbdirect_socket_parameters *sp = + smbd_get_parameters(server->smbd_conn); if (server->sign) /* @@ -555,8 +555,8 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) rsize = min_t(unsigned int, rsize, server->max_read); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { - struct smbdirect_socket_parameters *sp = - &server->smbd_conn->socket.parameters; + const struct smbdirect_socket_parameters *sp = + smbd_get_parameters(server->smbd_conn); if (server->sign) /* diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c3b9d3f6210f..1c63d2c9cc9c 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4411,7 +4411,7 @@ static inline bool smb3_use_rdma_offload(struct cifs_io_parms *io_parms) return false; /* offload also has its overhead, so only do it if desired */ - if (io_parms->length < server->smbd_conn->rdma_readwrite_threshold) + if (io_parms->length < server->rdma_readwrite_threshold) return false; return true; diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index e0fce5033004..316f398c70f4 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -13,28 +13,35 @@ #include "cifsproto.h" #include "smb2proto.h" +const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn) +{ + struct smbdirect_socket *sc = &conn->socket; + + return &sc->parameters; +} + static struct smbdirect_recv_io *get_receive_buffer( - struct smbd_connection *info); + struct smbdirect_socket *sc); static void put_receive_buffer( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct smbdirect_recv_io *response); -static int allocate_receive_buffers(struct smbd_connection *info, int num_buf); -static void destroy_receive_buffers(struct smbd_connection *info); +static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf); +static void destroy_receive_buffers(struct smbdirect_socket *sc); static void enqueue_reassembly( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct smbdirect_recv_io *response, int data_length); static struct smbdirect_recv_io *_get_first_reassembly( - struct smbd_connection *info); + struct smbdirect_socket *sc); static int smbd_post_recv( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct smbdirect_recv_io *response); -static int smbd_post_send_empty(struct smbd_connection *info); +static int smbd_post_send_empty(struct smbdirect_socket *sc); -static void destroy_mr_list(struct smbd_connection *info); -static int allocate_mr_list(struct smbd_connection *info); +static void destroy_mr_list(struct smbdirect_socket *sc); +static int allocate_mr_list(struct smbdirect_socket *sc); struct smb_extract_to_rdma { struct ib_sge *sge; @@ -57,6 +64,9 @@ static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, /* SMBD negotiation timeout in seconds */ #define SMBD_NEGOTIATE_TIMEOUT 120 +/* The timeout to wait for a keepalive message from peer in seconds */ +#define KEEPALIVE_RECV_TIMEOUT 5 + /* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ #define SMBD_MIN_RECEIVE_SIZE 128 #define SMBD_MIN_FRAGMENTED_SIZE 131072 @@ -155,65 +165,277 @@ do { \ #define log_rdma_mr(level, fmt, args...) \ log_rdma(level, LOG_RDMA_MR, fmt, ##args) +static void smbd_disconnect_wake_up_all(struct smbdirect_socket *sc) +{ + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.credits.wait_queue); + wake_up_all(&sc->send_io.pending.dec_wait_queue); + wake_up_all(&sc->send_io.pending.zero_wait_queue); + wake_up_all(&sc->recv_io.reassembly.wait_queue); + wake_up_all(&sc->mr_io.ready.wait_queue); + wake_up_all(&sc->mr_io.cleanup.wait_queue); +} + static void smbd_disconnect_rdma_work(struct work_struct *work) { - struct smbd_connection *info = - container_of(work, struct smbd_connection, disconnect_work); - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, disconnect_work); - if (sc->status == SMBDIRECT_SOCKET_CONNECTED) { + /* + * make sure this and other work is not queued again + * but here we don't block and avoid + * disable[_delayed]_work_sync() + */ + disable_work(&sc->disconnect_work); + disable_work(&sc->recv_io.posted.refill_work); + disable_work(&sc->mr_io.recovery_work); + disable_work(&sc->idle.immediate_work); + disable_delayed_work(&sc->idle.timer_work); + + if (sc->first_error == 0) + sc->first_error = -ECONNABORTED; + + switch (sc->status) { + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: + case SMBDIRECT_SOCKET_CONNECTED: + case SMBDIRECT_SOCKET_ERROR: sc->status = SMBDIRECT_SOCKET_DISCONNECTING; rdma_disconnect(sc->rdma.cm_id); + break; + + case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: + /* + * rdma_connect() never reached + * RDMA_CM_EVENT_ESTABLISHED + */ + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + break; + + case SMBDIRECT_SOCKET_DISCONNECTING: + case SMBDIRECT_SOCKET_DISCONNECTED: + case SMBDIRECT_SOCKET_DESTROYED: + break; } + + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + smbd_disconnect_wake_up_all(sc); } -static void smbd_disconnect_rdma_connection(struct smbd_connection *info) +static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) { - queue_work(info->workqueue, &info->disconnect_work); + /* + * make sure other work (than disconnect_work) is + * not queued again but here we don't block and avoid + * disable[_delayed]_work_sync() + */ + disable_work(&sc->recv_io.posted.refill_work); + disable_work(&sc->mr_io.recovery_work); + disable_work(&sc->idle.immediate_work); + disable_delayed_work(&sc->idle.timer_work); + + if (sc->first_error == 0) + sc->first_error = -ECONNABORTED; + + switch (sc->status) { + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: + case SMBDIRECT_SOCKET_ERROR: + case SMBDIRECT_SOCKET_DISCONNECTING: + case SMBDIRECT_SOCKET_DISCONNECTED: + case SMBDIRECT_SOCKET_DESTROYED: + /* + * Keep the current error status + */ + break; + + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; + break; + + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; + break; + + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; + break; + + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; + break; + + case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_CONNECTED: + sc->status = SMBDIRECT_SOCKET_ERROR; + break; + } + + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + smbd_disconnect_wake_up_all(sc); + + queue_work(sc->workqueue, &sc->disconnect_work); } /* Upcall from RDMA CM */ static int smbd_conn_upcall( struct rdma_cm_id *id, struct rdma_cm_event *event) { - struct smbd_connection *info = id->context; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = id->context; + struct smbdirect_socket_parameters *sp = &sc->parameters; const char *event_name = rdma_event_msg(event->event); + u8 peer_initiator_depth; + u8 peer_responder_resources; log_rdma_event(INFO, "event=%s status=%d\n", event_name, event->status); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED; + wake_up(&sc->status_wait); + break; + case RDMA_CM_EVENT_ROUTE_RESOLVED: - info->ri_rc = 0; - complete(&info->ri_done); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; + wake_up(&sc->status_wait); break; case RDMA_CM_EVENT_ADDR_ERROR: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); - info->ri_rc = -EHOSTUNREACH; - complete(&info->ri_done); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; + smbd_disconnect_rdma_work(&sc->disconnect_work); break; case RDMA_CM_EVENT_ROUTE_ERROR: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); - info->ri_rc = -ENETUNREACH; - complete(&info->ri_done); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; + smbd_disconnect_rdma_work(&sc->disconnect_work); break; case RDMA_CM_EVENT_ESTABLISHED: log_rdma_event(INFO, "connected event=%s\n", event_name); - sc->status = SMBDIRECT_SOCKET_CONNECTED; - wake_up_interruptible(&info->status_wait); + + /* + * Here we work around an inconsistency between + * iWarp and other devices (at least rxe and irdma using RoCEv2) + */ + if (rdma_protocol_iwarp(id->device, id->port_num)) { + /* + * iWarp devices report the peer's values + * with the perspective of the peer here. + * Tested with siw and irdma (in iwarp mode) + * We need to change to our perspective here, + * so we need to switch the values. + */ + peer_initiator_depth = event->param.conn.responder_resources; + peer_responder_resources = event->param.conn.initiator_depth; + } else { + /* + * Non iWarp devices report the peer's values + * already changed to our perspective here. + * Tested with rxe and irdma (in roce mode). + */ + peer_initiator_depth = event->param.conn.initiator_depth; + peer_responder_resources = event->param.conn.responder_resources; + } + if (rdma_protocol_iwarp(id->device, id->port_num) && + event->param.conn.private_data_len == 8) { + /* + * Legacy clients with only iWarp MPA v1 support + * need a private blob in order to negotiate + * the IRD/ORD values. + */ + const __be32 *ird_ord_hdr = event->param.conn.private_data; + u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); + u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); + + /* + * cifs.ko sends the legacy IRD/ORD negotiation + * event if iWarp MPA v2 was used. + * + * Here we check that the values match and only + * mark the client as legacy if they don't match. + */ + if ((u32)event->param.conn.initiator_depth != ird32 || + (u32)event->param.conn.responder_resources != ord32) { + /* + * There are broken clients (old cifs.ko) + * using little endian and also + * struct rdma_conn_param only uses u8 + * for initiator_depth and responder_resources, + * so we truncate the value to U8_MAX. + * + * smb_direct_accept_client() will then + * do the real negotiation in order to + * select the minimum between client and + * server. + */ + ird32 = min_t(u32, ird32, U8_MAX); + ord32 = min_t(u32, ord32, U8_MAX); + + sc->rdma.legacy_iwarp = true; + peer_initiator_depth = (u8)ird32; + peer_responder_resources = (u8)ord32; + } + } + + /* + * negotiate the value by using the minimum + * between client and server if the client provided + * non 0 values. + */ + if (peer_initiator_depth != 0) + sp->initiator_depth = + min_t(u8, sp->initiator_depth, + peer_initiator_depth); + if (peer_responder_resources != 0) + sp->responder_resources = + min_t(u8, sp->responder_resources, + peer_responder_resources); + + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING); + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; + wake_up(&sc->status_wait); break; case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up_interruptible(&info->status_wait); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; + smbd_disconnect_rdma_work(&sc->disconnect_work); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -221,15 +443,10 @@ static int smbd_conn_upcall( /* This happens when we fail the negotiation */ if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { log_rdma_event(ERR, "event=%s during negotiation\n", event_name); - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up(&info->status_wait); - break; } sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up_interruptible(&info->status_wait); - wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); - wake_up_interruptible_all(&info->wait_send_queue); + smbd_disconnect_rdma_work(&sc->disconnect_work); break; default: @@ -245,15 +462,15 @@ static int smbd_conn_upcall( static void smbd_qp_async_error_upcall(struct ib_event *event, void *context) { - struct smbd_connection *info = context; + struct smbdirect_socket *sc = context; - log_rdma_event(ERR, "%s on device %s info %p\n", - ib_event_msg(event->event), event->device->name, info); + log_rdma_event(ERR, "%s on device %s socket %p\n", + ib_event_msg(event->event), event->device->name, sc); switch (event->event) { case IB_EVENT_CQ_ERR: case IB_EVENT_QP_FATAL: - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); break; default: @@ -278,11 +495,9 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) struct smbdirect_send_io *request = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); struct smbdirect_socket *sc = request->socket; - struct smbd_connection *info = - container_of(sc, struct smbd_connection, socket); - log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%d\n", - request, wc->status); + log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%s\n", + request, ib_wc_status_msg(wc->status)); for (i = 0; i < request->num_sge; i++) ib_dma_unmap_single(sc->ib.dev, @@ -291,17 +506,18 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) DMA_TO_DEVICE); if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { - log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", - wc->status, wc->opcode); + if (wc->status != IB_WC_WR_FLUSH_ERR) + log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n", + ib_wc_status_msg(wc->status), wc->opcode); mempool_free(request, sc->send_io.mem.pool); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); return; } - if (atomic_dec_and_test(&info->send_pending)) - wake_up(&info->wait_send_pending); + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); - wake_up(&info->wait_post_send); + wake_up(&sc->send_io.pending.dec_wait_queue); mempool_free(request, sc->send_io.mem.pool); } @@ -325,8 +541,6 @@ static bool process_negotiation_response( struct smbdirect_recv_io *response, int packet_length) { struct smbdirect_socket *sc = response->socket; - struct smbd_connection *info = - container_of(sc, struct smbd_connection, socket); struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_negotiate_resp *packet = smbdirect_recv_io_payload(response); @@ -341,21 +555,19 @@ static bool process_negotiation_response( le16_to_cpu(packet->negotiated_version)); return false; } - info->protocol = le16_to_cpu(packet->negotiated_version); if (packet->credits_requested == 0) { log_rdma_event(ERR, "error: credits_requested==0\n"); return false; } - info->receive_credit_target = le16_to_cpu(packet->credits_requested); + sc->recv_io.credits.target = le16_to_cpu(packet->credits_requested); + sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); if (packet->credits_granted == 0) { log_rdma_event(ERR, "error: credits_granted==0\n"); return false; } - atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted)); - - atomic_set(&info->receive_credits, 0); + atomic_set(&sc->send_io.credits.count, le16_to_cpu(packet->credits_granted)); if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { log_rdma_event(ERR, "error: preferred_send_size=%d\n", @@ -380,16 +592,12 @@ static bool process_negotiation_response( } sp->max_fragmented_send_size = le32_to_cpu(packet->max_fragmented_size); - info->rdma_readwrite_threshold = - rdma_readwrite_threshold > sp->max_fragmented_send_size ? - sp->max_fragmented_send_size : - rdma_readwrite_threshold; sp->max_read_write_size = min_t(u32, le32_to_cpu(packet->max_readwrite_size), - info->max_frmr_depth * PAGE_SIZE); - info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; + sp->max_frmr_depth * PAGE_SIZE); + sp->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; return true; @@ -397,52 +605,40 @@ static bool process_negotiation_response( static void smbd_post_send_credits(struct work_struct *work) { - int ret = 0; int rc; struct smbdirect_recv_io *response; - struct smbd_connection *info = - container_of(work, struct smbd_connection, - post_send_credits_work); - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - wake_up(&info->wait_receive_queues); return; } - if (info->receive_credit_target > - atomic_read(&info->receive_credits)) { + if (sc->recv_io.credits.target > + atomic_read(&sc->recv_io.credits.count)) { while (true) { - response = get_receive_buffer(info); + response = get_receive_buffer(sc); if (!response) break; response->first_segment = false; - rc = smbd_post_recv(info, response); + rc = smbd_post_recv(sc, response); if (rc) { log_rdma_recv(ERR, "post_recv failed rc=%d\n", rc); - put_receive_buffer(info, response); + put_receive_buffer(sc, response); break; } - ret++; + atomic_inc(&sc->recv_io.posted.count); } } - spin_lock(&info->lock_new_credits_offered); - info->new_credits_offered += ret; - spin_unlock(&info->lock_new_credits_offered); - /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ - info->send_immediate = true; - if (atomic_read(&info->receive_credits) < - info->receive_credit_target - 1) { - if (info->keep_alive_requested == KEEP_ALIVE_PENDING || - info->send_immediate) { - log_keep_alive(INFO, "send an empty message\n"); - smbd_post_send_empty(info); - } + if (atomic_read(&sc->recv_io.credits.count) < + sc->recv_io.credits.target - 1) { + log_keep_alive(INFO, "schedule send of an empty message\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); } } @@ -454,19 +650,22 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); struct smbdirect_socket *sc = response->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbd_connection *info = - container_of(sc, struct smbd_connection, socket); + u16 old_recv_credit_target; u32 data_offset = 0; u32 data_length = 0; u32 remaining_data_length = 0; + bool negotiate_done = false; - log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", - response, sc->recv_io.expected, wc->status, wc->opcode, + log_rdma_recv(INFO, + "response=0x%p type=%d wc status=%s wc opcode %d byte_len=%d pkey_index=%u\n", + response, sc->recv_io.expected, + ib_wc_status_msg(wc->status), wc->opcode, wc->byte_len, wc->pkey_index); if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { - log_rdma_recv(INFO, "wc->status=%d opcode=%d\n", - wc->status, wc->opcode); + if (wc->status != IB_WC_WR_FLUSH_ERR) + log_rdma_recv(ERR, "wc->status=%s opcode=%d\n", + ib_wc_status_msg(wc->status), wc->opcode); goto error; } @@ -476,15 +675,31 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) response->sge.length, DMA_FROM_DEVICE); + /* + * Reset timer to the keepalive interval in + * order to trigger our next keepalive message. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_interval_msec)); + switch (sc->recv_io.expected) { /* SMBD negotiation response */ case SMBDIRECT_EXPECT_NEGOTIATE_REP: dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); sc->recv_io.reassembly.full_packet_received = true; - info->negotiate_done = + negotiate_done = process_negotiation_response(response, wc->byte_len); - put_receive_buffer(info, response); - complete(&info->negotiate_completion); + put_receive_buffer(sc, response); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING); + if (!negotiate_done) { + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; + smbd_disconnect_rdma_connection(sc); + } else { + sc->status = SMBDIRECT_SOCKET_CONNECTED; + wake_up(&sc->status_wait); + } + return; /* SMBD data transfer packet */ @@ -517,17 +732,23 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) sc->recv_io.reassembly.full_packet_received = true; } - atomic_dec(&info->receive_credits); - info->receive_credit_target = + atomic_dec(&sc->recv_io.posted.count); + atomic_dec(&sc->recv_io.credits.count); + old_recv_credit_target = sc->recv_io.credits.target; + sc->recv_io.credits.target = le16_to_cpu(data_transfer->credits_requested); + sc->recv_io.credits.target = + min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); + sc->recv_io.credits.target = + max_t(u16, sc->recv_io.credits.target, 1); if (le16_to_cpu(data_transfer->credits_granted)) { atomic_add(le16_to_cpu(data_transfer->credits_granted), - &info->send_credits); + &sc->send_io.credits.count); /* * We have new send credits granted from remote peer * If any sender is waiting for credits, unblock it */ - wake_up_interruptible(&info->wait_send_queue); + wake_up(&sc->send_io.credits.wait_queue); } log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n", @@ -536,11 +757,11 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) le32_to_cpu(data_transfer->data_length), le32_to_cpu(data_transfer->remaining_data_length)); - /* Send a KEEP_ALIVE response right away if requested */ - info->keep_alive_requested = KEEP_ALIVE_NONE; + /* Send an immediate response right away if requested */ if (le16_to_cpu(data_transfer->flags) & SMBDIRECT_FLAG_RESPONSE_REQUESTED) { - info->keep_alive_requested = KEEP_ALIVE_PENDING; + log_keep_alive(INFO, "schedule send of immediate response\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); } /* @@ -548,10 +769,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) * reassembly queue and wake up the reading thread */ if (data_length) { - enqueue_reassembly(info, response, data_length); - wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); + if (sc->recv_io.credits.target > old_recv_credit_target) + queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); + + enqueue_reassembly(sc, response, data_length); + wake_up(&sc->recv_io.reassembly.wait_queue); } else - put_receive_buffer(info, response); + put_receive_buffer(sc, response); return; @@ -566,19 +790,20 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected); WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); error: - put_receive_buffer(info, response); - smbd_disconnect_rdma_connection(info); + put_receive_buffer(sc, response); + smbd_disconnect_rdma_connection(sc); } static struct rdma_cm_id *smbd_create_id( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct sockaddr *dstaddr, int port) { + struct smbdirect_socket_parameters *sp = &sc->parameters; struct rdma_cm_id *id; int rc; __be16 *sport; - id = rdma_create_id(&init_net, smbd_conn_upcall, info, + id = rdma_create_id(&init_net, smbd_conn_upcall, sc, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); @@ -593,43 +818,57 @@ static struct rdma_cm_id *smbd_create_id( *sport = htons(port); - init_completion(&info->ri_done); - info->ri_rc = -ETIMEDOUT; - + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING; rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr, - RDMA_RESOLVE_TIMEOUT); + sp->resolve_addr_timeout_msec); if (rc) { log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); goto out; } - rc = wait_for_completion_interruptible_timeout( - &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + rc = wait_event_interruptible_timeout( + sc->status_wait, + sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, + msecs_to_jiffies(sp->resolve_addr_timeout_msec)); /* e.g. if interrupted returns -ERESTARTSYS */ if (rc < 0) { log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); goto out; } - rc = info->ri_rc; - if (rc) { + if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING) { + rc = -ETIMEDOUT; + log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); + goto out; + } + if (sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED) { + rc = -EHOSTUNREACH; log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); goto out; } - info->ri_rc = -ETIMEDOUT; - rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING; + rc = rdma_resolve_route(id, sp->resolve_route_timeout_msec); if (rc) { log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); goto out; } - rc = wait_for_completion_interruptible_timeout( - &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + rc = wait_event_interruptible_timeout( + sc->status_wait, + sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, + msecs_to_jiffies(sp->resolve_route_timeout_msec)); /* e.g. if interrupted returns -ERESTARTSYS */ if (rc < 0) { log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); goto out; } - rc = info->ri_rc; - if (rc) { + if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING) { + rc = -ETIMEDOUT; + log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); + goto out; + } + if (sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED) { + rc = -ENETUNREACH; log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); goto out; } @@ -656,13 +895,16 @@ static bool frwr_is_supported(struct ib_device_attr *attrs) } static int smbd_ia_open( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct sockaddr *dstaddr, int port) { - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; int rc; - sc->rdma.cm_id = smbd_create_id(info, dstaddr, port); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED; + + sc->rdma.cm_id = smbd_create_id(sc, dstaddr, port); if (IS_ERR(sc->rdma.cm_id)) { rc = PTR_ERR(sc->rdma.cm_id); goto out1; @@ -677,19 +919,12 @@ static int smbd_ia_open( rc = -EPROTONOSUPPORT; goto out2; } - info->max_frmr_depth = min_t(int, - smbd_max_frmr_depth, + sp->max_frmr_depth = min_t(u32, + sp->max_frmr_depth, sc->ib.dev->attrs.max_fast_reg_page_list_len); - info->mr_type = IB_MR_TYPE_MEM_REG; + sc->mr_io.type = IB_MR_TYPE_MEM_REG; if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) - info->mr_type = IB_MR_TYPE_SG_GAPS; - - sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); - if (IS_ERR(sc->ib.pd)) { - rc = PTR_ERR(sc->ib.pd); - log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); - goto out2; - } + sc->mr_io.type = IB_MR_TYPE_SG_GAPS; return 0; @@ -707,9 +942,8 @@ out1: * After negotiation, the transport is connected and ready for * carrying upper layer SMB payload */ -static int smbd_post_send_negotiate_req(struct smbd_connection *info) +static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc = -ENOMEM; @@ -761,18 +995,18 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->sge[0].addr, request->sge[0].length, request->sge[0].lkey); - atomic_inc(&info->send_pending); + atomic_inc(&sc->send_io.pending.count); rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (!rc) return 0; /* if we reach here, post send failed */ log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); - atomic_dec(&info->send_pending); + atomic_dec(&sc->send_io.pending.count); ib_dma_unmap_single(sc->ib.dev, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); dma_mapping_failed: mempool_free(request, sc->send_io.mem.pool); @@ -787,14 +1021,20 @@ dma_mapping_failed: * buffer as possible, and extend the receive credits to remote peer * return value: the new credtis being granted. */ -static int manage_credits_prior_sending(struct smbd_connection *info) +static int manage_credits_prior_sending(struct smbdirect_socket *sc) { int new_credits; - spin_lock(&info->lock_new_credits_offered); - new_credits = info->new_credits_offered; - info->new_credits_offered = 0; - spin_unlock(&info->lock_new_credits_offered); + if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) + return 0; + + new_credits = atomic_read(&sc->recv_io.posted.count); + if (new_credits == 0) + return 0; + + new_credits -= atomic_read(&sc->recv_io.credits.count); + if (new_credits <= 0) + return 0; return new_credits; } @@ -808,21 +1048,27 @@ static int manage_credits_prior_sending(struct smbd_connection *info) * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set * 0: otherwise */ -static int manage_keep_alive_before_sending(struct smbd_connection *info) +static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) { - if (info->keep_alive_requested == KEEP_ALIVE_PENDING) { - info->keep_alive_requested = KEEP_ALIVE_SENT; + struct smbdirect_socket_parameters *sp = &sc->parameters; + + if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; + /* + * Now use the keepalive timeout (instead of keepalive interval) + * in order to wait for a response + */ + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_timeout_msec)); return 1; } return 0; } /* Post the send request */ -static int smbd_post_send(struct smbd_connection *info, +static int smbd_post_send(struct smbdirect_socket *sc, struct smbdirect_send_io *request) { - struct smbdirect_socket *sc = &info->socket; - struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc, i; @@ -849,21 +1095,17 @@ static int smbd_post_send(struct smbd_connection *info, rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (rc) { log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); rc = -EAGAIN; - } else - /* Reset timer for idle connection after packet is sent */ - mod_delayed_work(info->workqueue, &info->idle_timer_work, - msecs_to_jiffies(sp->keepalive_interval_msec)); + } return rc; } -static int smbd_post_send_iter(struct smbd_connection *info, +static int smbd_post_send_iter(struct smbdirect_socket *sc, struct iov_iter *iter, int *_remaining_data_length) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; int i, rc; int header_length; @@ -874,8 +1116,8 @@ static int smbd_post_send_iter(struct smbd_connection *info, wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ - rc = wait_event_interruptible(info->wait_send_queue, - atomic_read(&info->send_credits) > 0 || + rc = wait_event_interruptible(sc->send_io.credits.wait_queue, + atomic_read(&sc->send_io.credits.count) > 0 || sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) goto err_wait_credit; @@ -885,14 +1127,14 @@ wait_credit: rc = -EAGAIN; goto err_wait_credit; } - if (unlikely(atomic_dec_return(&info->send_credits) < 0)) { - atomic_inc(&info->send_credits); + if (unlikely(atomic_dec_return(&sc->send_io.credits.count) < 0)) { + atomic_inc(&sc->send_io.credits.count); goto wait_credit; } wait_send_queue: - wait_event(info->wait_post_send, - atomic_read(&info->send_pending) < sp->send_credit_target || + wait_event(sc->send_io.pending.dec_wait_queue, + atomic_read(&sc->send_io.pending.count) < sp->send_credit_target || sc->status != SMBDIRECT_SOCKET_CONNECTED); if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { @@ -901,9 +1143,9 @@ wait_send_queue: goto err_wait_send_queue; } - if (unlikely(atomic_inc_return(&info->send_pending) > + if (unlikely(atomic_inc_return(&sc->send_io.pending.count) > sp->send_credit_target)) { - atomic_dec(&info->send_pending); + atomic_dec(&sc->send_io.pending.count); goto wait_send_queue; } @@ -916,10 +1158,30 @@ wait_send_queue: request->socket = sc; memset(request->sge, 0, sizeof(request->sge)); + /* Map the packet to DMA */ + header_length = sizeof(struct smbdirect_data_transfer); + /* If this is a packet without payload, don't send padding */ + if (!iter) + header_length = offsetof(struct smbdirect_data_transfer, padding); + + packet = smbdirect_send_io_payload(request); + request->sge[0].addr = ib_dma_map_single(sc->ib.dev, + (void *)packet, + header_length, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { + rc = -EIO; + goto err_dma; + } + + request->sge[0].length = header_length; + request->sge[0].lkey = sc->ib.pd->local_dma_lkey; + request->num_sge = 1; + /* Fill in the data payload to find out how much data we can add */ if (iter) { struct smb_extract_to_rdma extract = { - .nr_sge = 1, + .nr_sge = request->num_sge, .max_sge = SMBDIRECT_SEND_IO_MAX_SGE, .sge = request->sge, .device = sc->ib.dev, @@ -938,21 +1200,17 @@ wait_send_queue: *_remaining_data_length -= data_length; } else { data_length = 0; - request->num_sge = 1; } /* Fill in the packet header */ - packet = smbdirect_send_io_payload(request); packet->credits_requested = cpu_to_le16(sp->send_credit_target); - new_credits = manage_credits_prior_sending(info); - atomic_add(new_credits, &info->receive_credits); + new_credits = manage_credits_prior_sending(sc); + atomic_add(new_credits, &sc->recv_io.credits.count); packet->credits_granted = cpu_to_le16(new_credits); - info->send_immediate = false; - packet->flags = 0; - if (manage_keep_alive_before_sending(info)) + if (manage_keep_alive_before_sending(sc)) packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); packet->reserved = 0; @@ -971,26 +1229,7 @@ wait_send_queue: le32_to_cpu(packet->data_length), le32_to_cpu(packet->remaining_data_length)); - /* Map the packet to DMA */ - header_length = sizeof(struct smbdirect_data_transfer); - /* If this is a packet without payload, don't send padding */ - if (!data_length) - header_length = offsetof(struct smbdirect_data_transfer, padding); - - request->sge[0].addr = ib_dma_map_single(sc->ib.dev, - (void *)packet, - header_length, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { - rc = -EIO; - request->sge[0].addr = 0; - goto err_dma; - } - - request->sge[0].length = header_length; - request->sge[0].lkey = sc->ib.pd->local_dma_lkey; - - rc = smbd_post_send(info, request); + rc = smbd_post_send(sc, request); if (!rc) return 0; @@ -1003,19 +1242,16 @@ err_dma: DMA_TO_DEVICE); mempool_free(request, sc->send_io.mem.pool); - /* roll back receive credits and credits to be offered */ - spin_lock(&info->lock_new_credits_offered); - info->new_credits_offered += new_credits; - spin_unlock(&info->lock_new_credits_offered); - atomic_sub(new_credits, &info->receive_credits); + /* roll back the granted receive credits */ + atomic_sub(new_credits, &sc->recv_io.credits.count); err_alloc: - if (atomic_dec_and_test(&info->send_pending)) - wake_up(&info->wait_send_pending); + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); err_wait_send_queue: /* roll back send credits and pending */ - atomic_inc(&info->send_credits); + atomic_inc(&sc->send_io.credits.count); err_wait_credit: return rc; @@ -1026,15 +1262,15 @@ err_wait_credit: * Empty message is used to extend credits to peer to for keep live * while there is no upper layer payload to send at the time */ -static int smbd_post_send_empty(struct smbd_connection *info) +static int smbd_post_send_empty(struct smbdirect_socket *sc) { int remaining_data_length = 0; - info->count_send_empty++; - return smbd_post_send_iter(info, NULL, &remaining_data_length); + sc->statistics.send_empty++; + return smbd_post_send_iter(sc, NULL, &remaining_data_length); } -static int smbd_post_send_full_iter(struct smbd_connection *info, +static int smbd_post_send_full_iter(struct smbdirect_socket *sc, struct iov_iter *iter, int *_remaining_data_length) { @@ -1047,7 +1283,7 @@ static int smbd_post_send_full_iter(struct smbd_connection *info, */ while (iov_iter_count(iter) > 0) { - rc = smbd_post_send_iter(info, iter, _remaining_data_length); + rc = smbd_post_send_iter(sc, iter, _remaining_data_length); if (rc < 0) break; } @@ -1061,9 +1297,8 @@ static int smbd_post_send_full_iter(struct smbd_connection *info, * The interaction is controlled by send/receive credit system */ static int smbd_post_recv( - struct smbd_connection *info, struct smbdirect_recv_io *response) + struct smbdirect_socket *sc, struct smbdirect_recv_io *response) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_recv_wr recv_wr; int rc = -EIO; @@ -1089,7 +1324,7 @@ static int smbd_post_recv( ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); response->sge.length = 0; - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); } @@ -1097,33 +1332,36 @@ static int smbd_post_recv( } /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ -static int smbd_negotiate(struct smbd_connection *info) +static int smbd_negotiate(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; int rc; - struct smbdirect_recv_io *response = get_receive_buffer(info); + struct smbdirect_recv_io *response = get_receive_buffer(sc); + + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED); + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; - rc = smbd_post_recv(info, response); + rc = smbd_post_recv(sc, response); log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", rc, response->sge.addr, response->sge.length, response->sge.lkey); if (rc) { - put_receive_buffer(info, response); + put_receive_buffer(sc, response); return rc; } - init_completion(&info->negotiate_completion); - info->negotiate_done = false; - rc = smbd_post_send_negotiate_req(info); + rc = smbd_post_send_negotiate_req(sc); if (rc) return rc; - rc = wait_for_completion_interruptible_timeout( - &info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ); - log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc); + rc = wait_event_interruptible_timeout( + sc->status_wait, + sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, + msecs_to_jiffies(sp->negotiate_timeout_msec)); + log_rdma_event(INFO, "wait_event_interruptible_timeout rc=%d\n", rc); - if (info->negotiate_done) + if (sc->status == SMBDIRECT_SOCKET_CONNECTED) return 0; if (rc == 0) @@ -1147,13 +1385,13 @@ static int smbd_negotiate(struct smbd_connection *info) * data_length: the size of payload in this packet */ static void enqueue_reassembly( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct smbdirect_recv_io *response, int data_length) { - struct smbdirect_socket *sc = &info->socket; + unsigned long flags; - spin_lock(&sc->recv_io.reassembly.lock); + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); list_add_tail(&response->list, &sc->recv_io.reassembly.list); sc->recv_io.reassembly.queue_length++; /* @@ -1164,9 +1402,8 @@ static void enqueue_reassembly( */ virt_wmb(); sc->recv_io.reassembly.data_length += data_length; - spin_unlock(&sc->recv_io.reassembly.lock); - info->count_reassembly_queue++; - info->count_enqueue_reassembly_queue++; + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); + sc->statistics.enqueue_reassembly_queue++; } /* @@ -1174,9 +1411,8 @@ static void enqueue_reassembly( * Caller is responsible for locking * return value: the first entry if any, NULL if queue is empty */ -static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *info) +static struct smbdirect_recv_io *_get_first_reassembly(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL; if (!list_empty(&sc->recv_io.reassembly.list)) { @@ -1193,9 +1429,8 @@ static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *i * pre-allocated in advance. * return value: the receive buffer, NULL if none is available */ -static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info) +static struct smbdirect_recv_io *get_receive_buffer(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL; unsigned long flags; @@ -1205,8 +1440,7 @@ static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info &sc->recv_io.free.list, struct smbdirect_recv_io, list); list_del(&ret->list); - info->count_receive_queue--; - info->count_get_receive_buffer++; + sc->statistics.get_receive_buffer++; } spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); @@ -1220,9 +1454,8 @@ static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info * receive buffer is returned. */ static void put_receive_buffer( - struct smbd_connection *info, struct smbdirect_recv_io *response) + struct smbdirect_socket *sc, struct smbdirect_recv_io *response) { - struct smbdirect_socket *sc = &info->socket; unsigned long flags; if (likely(response->sge.length != 0)) { @@ -1235,31 +1468,18 @@ static void put_receive_buffer( spin_lock_irqsave(&sc->recv_io.free.lock, flags); list_add_tail(&response->list, &sc->recv_io.free.list); - info->count_receive_queue++; - info->count_put_receive_buffer++; + sc->statistics.put_receive_buffer++; spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); - queue_work(info->workqueue, &info->post_send_credits_work); + queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); } /* Preallocate all receive buffer on transport establishment */ -static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) +static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *response; int i; - INIT_LIST_HEAD(&sc->recv_io.reassembly.list); - spin_lock_init(&sc->recv_io.reassembly.lock); - sc->recv_io.reassembly.data_length = 0; - sc->recv_io.reassembly.queue_length = 0; - - INIT_LIST_HEAD(&sc->recv_io.free.list); - spin_lock_init(&sc->recv_io.free.lock); - info->count_receive_queue = 0; - - init_waitqueue_head(&info->wait_receive_queues); - for (i = 0; i < num_buf; i++) { response = mempool_alloc(sc->recv_io.mem.pool, GFP_KERNEL); if (!response) @@ -1268,7 +1488,6 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) response->socket = sc; response->sge.length = 0; list_add_tail(&response->list, &sc->recv_io.free.list); - info->count_receive_queue++; } return 0; @@ -1279,45 +1498,59 @@ allocate_failed: &sc->recv_io.free.list, struct smbdirect_recv_io, list); list_del(&response->list); - info->count_receive_queue--; mempool_free(response, sc->recv_io.mem.pool); } return -ENOMEM; } -static void destroy_receive_buffers(struct smbd_connection *info) +static void destroy_receive_buffers(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *response; - while ((response = get_receive_buffer(info))) + while ((response = get_receive_buffer(sc))) mempool_free(response, sc->recv_io.mem.pool); } +static void send_immediate_empty_message(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, idle.immediate_work); + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + return; + + log_keep_alive(INFO, "send an empty message\n"); + smbd_post_send_empty(sc); +} + /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ static void idle_connection_timer(struct work_struct *work) { - struct smbd_connection *info = container_of( - work, struct smbd_connection, - idle_timer_work.work); - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, idle.timer_work.work); struct smbdirect_socket_parameters *sp = &sc->parameters; - if (info->keep_alive_requested != KEEP_ALIVE_NONE) { + if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { log_keep_alive(ERR, - "error status info->keep_alive_requested=%d\n", - info->keep_alive_requested); - smbd_disconnect_rdma_connection(info); + "error status sc->idle.keepalive=%d\n", + sc->idle.keepalive); + smbd_disconnect_rdma_connection(sc); return; } - log_keep_alive(INFO, "about to send an empty idle message\n"); - smbd_post_send_empty(info); + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + return; - /* Setup the next idle timeout work */ - queue_delayed_work(info->workqueue, &info->idle_timer_work, - msecs_to_jiffies(sp->keepalive_interval_msec)); + /* + * Now use the keepalive timeout (instead of keepalive interval) + * in order to wait for a response + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_timeout_msec)); + log_keep_alive(INFO, "schedule send of empty idle message\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); } /* @@ -1329,7 +1562,6 @@ void smbd_destroy(struct TCP_Server_Info *server) { struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc; - struct smbdirect_socket_parameters *sp; struct smbdirect_recv_io *response; unsigned long flags; @@ -1338,19 +1570,30 @@ void smbd_destroy(struct TCP_Server_Info *server) return; } sc = &info->socket; - sp = &sc->parameters; + + log_rdma_event(INFO, "cancelling and disable disconnect_work\n"); + disable_work_sync(&sc->disconnect_work); log_rdma_event(INFO, "destroying rdma session\n"); - if (sc->status != SMBDIRECT_SOCKET_DISCONNECTED) { - rdma_disconnect(sc->rdma.cm_id); + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + smbd_disconnect_rdma_work(&sc->disconnect_work); log_rdma_event(INFO, "wait for transport being disconnected\n"); wait_event_interruptible( - info->status_wait, + sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); } - log_rdma_event(INFO, "cancelling post_send_credits_work\n"); - disable_work_sync(&info->post_send_credits_work); + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + * + * Most likely this was already called via + * smbd_disconnect_rdma_work(), but call it again... + */ + smbd_disconnect_wake_up_all(sc); + + log_rdma_event(INFO, "cancelling recv_io.posted.refill_work\n"); + disable_work_sync(&sc->recv_io.posted.refill_work); log_rdma_event(INFO, "destroying qp\n"); ib_drain_qp(sc->ib.qp); @@ -1358,18 +1601,20 @@ void smbd_destroy(struct TCP_Server_Info *server) sc->ib.qp = NULL; log_rdma_event(INFO, "cancelling idle timer\n"); - disable_delayed_work_sync(&info->idle_timer_work); + disable_delayed_work_sync(&sc->idle.timer_work); + log_rdma_event(INFO, "cancelling send immediate work\n"); + disable_work_sync(&sc->idle.immediate_work); /* It's not possible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); do { spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - response = _get_first_reassembly(info); + response = _get_first_reassembly(sc); if (response) { list_del(&response->list); spin_unlock_irqrestore( &sc->recv_io.reassembly.lock, flags); - put_receive_buffer(info, response); + put_receive_buffer(sc, response); } else spin_unlock_irqrestore( &sc->recv_io.reassembly.lock, flags); @@ -1377,9 +1622,7 @@ void smbd_destroy(struct TCP_Server_Info *server) sc->recv_io.reassembly.data_length = 0; log_rdma_event(INFO, "free receive buffers\n"); - wait_event(info->wait_receive_queues, - info->count_receive_queue == sp->recv_credit_max); - destroy_receive_buffers(info); + destroy_receive_buffers(sc); /* * For performance reasons, memory registration and deregistration @@ -1389,13 +1632,12 @@ void smbd_destroy(struct TCP_Server_Info *server) * path when sending data, and then release memory registrations. */ log_rdma_event(INFO, "freeing mr list\n"); - wake_up_interruptible_all(&info->wait_mr); - while (atomic_read(&info->mr_used_count)) { + while (atomic_read(&sc->mr_io.used.count)) { cifs_server_unlock(server); msleep(1000); cifs_server_lock(server); } - destroy_mr_list(info); + destroy_mr_list(sc); ib_free_cq(sc->ib.send_cq); ib_free_cq(sc->ib.recv_cq); @@ -1411,7 +1653,7 @@ void smbd_destroy(struct TCP_Server_Info *server) sc->status = SMBDIRECT_SOCKET_DESTROYED; - destroy_workqueue(info->workqueue); + destroy_workqueue(sc->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); kfree(info); server->smbd_conn = NULL; @@ -1453,12 +1695,9 @@ create_conn: return -ENOENT; } -static void destroy_caches_and_workqueue(struct smbd_connection *info) +static void destroy_caches(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; - - destroy_receive_buffers(info); - destroy_workqueue(info->workqueue); + destroy_receive_buffers(sc); mempool_destroy(sc->recv_io.mem.pool); kmem_cache_destroy(sc->recv_io.mem.cache); mempool_destroy(sc->send_io.mem.pool); @@ -1466,9 +1705,8 @@ static void destroy_caches_and_workqueue(struct smbd_connection *info) } #define MAX_NAME_LEN 80 -static int allocate_caches_and_workqueue(struct smbd_connection *info) +static int allocate_caches(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; char name[MAX_NAME_LEN]; int rc; @@ -1476,7 +1714,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) return -ENOMEM; - scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", info); + scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", sc); sc->send_io.mem.cache = kmem_cache_create( name, @@ -1492,7 +1730,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (!sc->send_io.mem.pool) goto out1; - scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", info); + scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", sc); struct kmem_cache_args response_args = { .align = __alignof__(struct smbdirect_recv_io), @@ -1513,21 +1751,14 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (!sc->recv_io.mem.pool) goto out3; - scnprintf(name, MAX_NAME_LEN, "smbd_%p", info); - info->workqueue = create_workqueue(name); - if (!info->workqueue) - goto out4; - - rc = allocate_receive_buffers(info, sp->recv_credit_max); + rc = allocate_receive_buffers(sc, sp->recv_credit_max); if (rc) { log_rdma_event(ERR, "failed to allocate receive buffers\n"); - goto out5; + goto out4; } return 0; -out5: - destroy_workqueue(info->workqueue); out4: mempool_destroy(sc->recv_io.mem.pool); out3: @@ -1551,46 +1782,63 @@ static struct smbd_connection *_smbd_get_connection( struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; struct ib_port_immutable port_immutable; - u32 ird_ord_hdr[2]; + __be32 ird_ord_hdr[2]; + char wq_name[80]; + struct workqueue_struct *workqueue; info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); if (!info) return NULL; sc = &info->socket; + scnprintf(wq_name, ARRAY_SIZE(wq_name), "smbd_%p", sc); + workqueue = create_workqueue(wq_name); + if (!workqueue) + goto create_wq_failed; + smbdirect_socket_init(sc); + sc->workqueue = workqueue; sp = &sc->parameters; - sc->status = SMBDIRECT_SOCKET_CONNECTING; - rc = smbd_ia_open(info, dstaddr, port); + INIT_WORK(&sc->disconnect_work, smbd_disconnect_rdma_work); + + sp->resolve_addr_timeout_msec = RDMA_RESOLVE_TIMEOUT; + sp->resolve_route_timeout_msec = RDMA_RESOLVE_TIMEOUT; + sp->rdma_connect_timeout_msec = RDMA_RESOLVE_TIMEOUT; + sp->negotiate_timeout_msec = SMBD_NEGOTIATE_TIMEOUT * 1000; + sp->initiator_depth = 1; + sp->responder_resources = SMBD_CM_RESPONDER_RESOURCES; + sp->recv_credit_max = smbd_receive_credit_max; + sp->send_credit_target = smbd_send_credit_target; + sp->max_send_size = smbd_max_send_size; + sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size; + sp->max_recv_size = smbd_max_receive_size; + sp->max_frmr_depth = smbd_max_frmr_depth; + sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; + sp->keepalive_timeout_msec = KEEPALIVE_RECV_TIMEOUT * 1000; + + rc = smbd_ia_open(sc, dstaddr, port); if (rc) { log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); goto create_id_failed; } - if (smbd_send_credit_target > sc->ib.dev->attrs.max_cqe || - smbd_send_credit_target > sc->ib.dev->attrs.max_qp_wr) { + if (sp->send_credit_target > sc->ib.dev->attrs.max_cqe || + sp->send_credit_target > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - smbd_send_credit_target, + sp->send_credit_target, sc->ib.dev->attrs.max_cqe, sc->ib.dev->attrs.max_qp_wr); goto config_failed; } - if (smbd_receive_credit_max > sc->ib.dev->attrs.max_cqe || - smbd_receive_credit_max > sc->ib.dev->attrs.max_qp_wr) { + if (sp->recv_credit_max > sc->ib.dev->attrs.max_cqe || + sp->recv_credit_max > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - smbd_receive_credit_max, + sp->recv_credit_max, sc->ib.dev->attrs.max_cqe, sc->ib.dev->attrs.max_qp_wr); goto config_failed; } - sp->recv_credit_max = smbd_receive_credit_max; - sp->send_credit_target = smbd_send_credit_target; - sp->max_send_size = smbd_max_send_size; - sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size; - sp->max_recv_size = smbd_max_receive_size; - sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; - if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE || sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { log_rdma_event(ERR, @@ -1602,8 +1850,16 @@ static struct smbd_connection *_smbd_get_connection( goto config_failed; } + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); + if (IS_ERR(sc->ib.pd)) { + rc = PTR_ERR(sc->ib.pd); + sc->ib.pd = NULL; + log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); + goto alloc_pd_failed; + } + sc->ib.send_cq = - ib_alloc_cq_any(sc->ib.dev, info, + ib_alloc_cq_any(sc->ib.dev, sc, sp->send_credit_target, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.send_cq)) { sc->ib.send_cq = NULL; @@ -1611,7 +1867,7 @@ static struct smbd_connection *_smbd_get_connection( } sc->ib.recv_cq = - ib_alloc_cq_any(sc->ib.dev, info, + ib_alloc_cq_any(sc->ib.dev, sc, sp->recv_credit_max, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.recv_cq)) { sc->ib.recv_cq = NULL; @@ -1620,7 +1876,7 @@ static struct smbd_connection *_smbd_get_connection( memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smbd_qp_async_error_upcall; - qp_attr.qp_context = info; + qp_attr.qp_context = sc; qp_attr.cap.max_send_wr = sp->send_credit_target; qp_attr.cap.max_recv_wr = sp->recv_credit_max; qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; @@ -1639,22 +1895,22 @@ static struct smbd_connection *_smbd_get_connection( } sc->ib.qp = sc->rdma.cm_id->qp; - memset(&conn_param, 0, sizeof(conn_param)); - conn_param.initiator_depth = 0; - - conn_param.responder_resources = - min(sc->ib.dev->attrs.max_qp_rd_atom, - SMBD_CM_RESPONDER_RESOURCES); - info->responder_resources = conn_param.responder_resources; + sp->responder_resources = + min_t(u8, sp->responder_resources, + sc->ib.dev->attrs.max_qp_rd_atom); log_rdma_mr(INFO, "responder_resources=%d\n", - info->responder_resources); + sp->responder_resources); + + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.initiator_depth = sp->initiator_depth; + conn_param.responder_resources = sp->responder_resources; /* Need to send IRD/ORD in private data for iWARP */ sc->ib.dev->ops.get_port_immutable( sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable); if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { - ird_ord_hdr[0] = info->responder_resources; - ird_ord_hdr[1] = 1; + ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); + ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); conn_param.private_data = ird_ord_hdr; conn_param.private_data_len = sizeof(ird_ord_hdr); } else { @@ -1669,8 +1925,8 @@ static struct smbd_connection *_smbd_get_connection( log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", &addr_in->sin_addr, port); - init_waitqueue_head(&info->status_wait); - init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; rc = rdma_connect(sc->rdma.cm_id, &conn_param); if (rc) { log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); @@ -1678,45 +1934,42 @@ static struct smbd_connection *_smbd_get_connection( } wait_event_interruptible_timeout( - info->status_wait, - sc->status != SMBDIRECT_SOCKET_CONNECTING, - msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + sc->status_wait, + sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, + msecs_to_jiffies(sp->rdma_connect_timeout_msec)); - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) { log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); goto rdma_connect_failed; } log_rdma_event(INFO, "rdma_connect connected\n"); - rc = allocate_caches_and_workqueue(info); + rc = allocate_caches(sc); if (rc) { log_rdma_event(ERR, "cache allocation failed\n"); goto allocate_cache_failed; } - init_waitqueue_head(&info->wait_send_queue); - INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); - queue_delayed_work(info->workqueue, &info->idle_timer_work, - msecs_to_jiffies(sp->keepalive_interval_msec)); - - init_waitqueue_head(&info->wait_send_pending); - atomic_set(&info->send_pending, 0); - - init_waitqueue_head(&info->wait_post_send); + INIT_WORK(&sc->idle.immediate_work, send_immediate_empty_message); + INIT_DELAYED_WORK(&sc->idle.timer_work, idle_connection_timer); + /* + * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING + * so that the timer will cause a disconnect. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->negotiate_timeout_msec)); - INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work); - INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits); - info->new_credits_offered = 0; - spin_lock_init(&info->lock_new_credits_offered); + INIT_WORK(&sc->recv_io.posted.refill_work, smbd_post_send_credits); - rc = smbd_negotiate(info); + rc = smbd_negotiate(sc); if (rc) { log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc); goto negotiation_failed; } - rc = allocate_mr_list(info); + rc = allocate_mr_list(sc); if (rc) { log_rdma_mr(ERR, "memory registration allocation failed\n"); goto allocate_mr_failed; @@ -1731,11 +1984,11 @@ allocate_mr_failed: return NULL; negotiation_failed: - disable_delayed_work_sync(&info->idle_timer_work); - destroy_caches_and_workqueue(info); + disable_delayed_work_sync(&sc->idle.timer_work); + destroy_caches(sc); sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; rdma_disconnect(sc->rdma.cm_id); - wait_event(info->status_wait, + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); allocate_cache_failed: @@ -1749,11 +2002,15 @@ alloc_cq_failed: if (sc->ib.recv_cq) ib_free_cq(sc->ib.recv_cq); -config_failed: ib_dealloc_pd(sc->ib.pd); + +alloc_pd_failed: +config_failed: rdma_destroy_id(sc->rdma.cm_id); create_id_failed: + destroy_workqueue(sc->workqueue); +create_wq_failed: kfree(info); return NULL; } @@ -1762,6 +2019,7 @@ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr) { struct smbd_connection *ret; + const struct smbdirect_socket_parameters *sp; int port = SMBD_PORT; try_again: @@ -1772,6 +2030,16 @@ try_again: port = SMB_PORT; goto try_again; } + if (!ret) + return NULL; + + sp = &ret->socket.parameters; + + server->rdma_readwrite_threshold = + rdma_readwrite_threshold > sp->max_fragmented_send_size ? + sp->max_fragmented_send_size : + rdma_readwrite_threshold; + return ret; } @@ -1813,6 +2081,7 @@ again: if (sc->recv_io.reassembly.data_length >= size) { int queue_length; int queue_removed = 0; + unsigned long flags; /* * Need to make sure reassembly_data_length is read before @@ -1827,7 +2096,7 @@ again: to_read = size; offset = sc->recv_io.reassembly.first_entry_offset; while (data_read < size) { - response = _get_first_reassembly(info); + response = _get_first_reassembly(sc); data_transfer = smbdirect_recv_io_payload(response); data_length = le32_to_cpu(data_transfer->data_length); remaining_data_length = @@ -1872,16 +2141,15 @@ again: if (queue_length) list_del(&response->list); else { - spin_lock_irq( - &sc->recv_io.reassembly.lock); + spin_lock_irqsave( + &sc->recv_io.reassembly.lock, flags); list_del(&response->list); - spin_unlock_irq( - &sc->recv_io.reassembly.lock); + spin_unlock_irqrestore( + &sc->recv_io.reassembly.lock, flags); } queue_removed++; - info->count_reassembly_queue--; - info->count_dequeue_reassembly_queue++; - put_receive_buffer(info, response); + sc->statistics.dequeue_reassembly_queue++; + put_receive_buffer(sc, response); offset = 0; log_read(INFO, "put_receive_buffer offset=0\n"); } else @@ -1895,10 +2163,10 @@ again: to_read, data_read, offset); } - spin_lock_irq(&sc->recv_io.reassembly.lock); + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); sc->recv_io.reassembly.data_length -= data_read; sc->recv_io.reassembly.queue_length -= queue_removed; - spin_unlock_irq(&sc->recv_io.reassembly.lock); + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); sc->recv_io.reassembly.first_entry_offset = offset; log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", @@ -1983,13 +2251,13 @@ int smbd_send(struct TCP_Server_Info *server, klen += rqst->rq_iov[i].iov_len; iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); - rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length); + rc = smbd_post_send_full_iter(sc, &iter, &remaining_data_length); if (rc < 0) break; if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */ - rc = smbd_post_send_full_iter(info, &rqst->rq_iter, + rc = smbd_post_send_full_iter(sc, &rqst->rq_iter, &remaining_data_length); if (rc < 0) break; @@ -2004,8 +2272,8 @@ int smbd_send(struct TCP_Server_Info *server, * that means all the I/Os have been out and we are good to return */ - wait_event(info->wait_send_pending, - atomic_read(&info->send_pending) == 0 || + wait_event(sc->send_io.pending.zero_wait_queue, + atomic_read(&sc->send_io.pending.count) == 0 || sc->status != SMBDIRECT_SOCKET_CONNECTED); if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0) @@ -2016,14 +2284,13 @@ int smbd_send(struct TCP_Server_Info *server, static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbd_mr *mr; - struct ib_cqe *cqe; + struct smbdirect_mr_io *mr = + container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); + struct smbdirect_socket *sc = mr->socket; if (wc->status) { log_rdma_mr(ERR, "status=%d\n", wc->status); - cqe = wc->wr_cqe; - mr = container_of(cqe, struct smbd_mr, cqe); - smbd_disconnect_rdma_connection(mr->conn); + smbd_disconnect_rdma_connection(sc); } } @@ -2038,14 +2305,14 @@ static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) */ static void smbd_mr_recovery_work(struct work_struct *work) { - struct smbd_connection *info = - container_of(work, struct smbd_connection, mr_recovery_work); - struct smbdirect_socket *sc = &info->socket; - struct smbd_mr *smbdirect_mr; + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, mr_io.recovery_work); + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_mr_io *smbdirect_mr; int rc; - list_for_each_entry(smbdirect_mr, &info->mr_list, list) { - if (smbdirect_mr->state == MR_ERROR) { + list_for_each_entry(smbdirect_mr, &sc->mr_io.all.list, list) { + if (smbdirect_mr->state == SMBDIRECT_MR_ERROR) { /* recover this MR entry */ rc = ib_dereg_mr(smbdirect_mr->mr); @@ -2053,25 +2320,25 @@ static void smbd_mr_recovery_work(struct work_struct *work) log_rdma_mr(ERR, "ib_dereg_mr failed rc=%x\n", rc); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); continue; } smbdirect_mr->mr = ib_alloc_mr( - sc->ib.pd, info->mr_type, - info->max_frmr_depth); + sc->ib.pd, sc->mr_io.type, + sp->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", - info->mr_type, - info->max_frmr_depth); - smbd_disconnect_rdma_connection(info); + sc->mr_io.type, + sp->max_frmr_depth); + smbd_disconnect_rdma_connection(sc); continue; } } else /* This MR is being used, don't recover it */ continue; - smbdirect_mr->state = MR_READY; + smbdirect_mr->state = SMBDIRECT_MR_READY; /* smbdirect_mr->state is updated by this function * and is read and updated by I/O issuing CPUs trying @@ -2080,19 +2347,18 @@ static void smbd_mr_recovery_work(struct work_struct *work) * value is updated before waking up any calls to * get_mr() from the I/O issuing CPUs */ - if (atomic_inc_return(&info->mr_ready_count) == 1) - wake_up_interruptible(&info->wait_mr); + if (atomic_inc_return(&sc->mr_io.ready.count) == 1) + wake_up(&sc->mr_io.ready.wait_queue); } } -static void destroy_mr_list(struct smbd_connection *info) +static void destroy_mr_list(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; - struct smbd_mr *mr, *tmp; + struct smbdirect_mr_io *mr, *tmp; - disable_work_sync(&info->mr_recovery_work); - list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { - if (mr->state == MR_INVALIDATED) + disable_work_sync(&sc->mr_io.recovery_work); + list_for_each_entry_safe(mr, tmp, &sc->mr_io.all.list, list) { + if (mr->state == SMBDIRECT_MR_INVALIDATED) ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); ib_dereg_mr(mr->mr); @@ -2108,32 +2374,32 @@ static void destroy_mr_list(struct smbd_connection *info) * Recovery is done in smbd_mr_recovery_work. The content of list entry changes * as MRs are used and recovered for I/O, but the list links will not change */ -static int allocate_mr_list(struct smbd_connection *info) +static int allocate_mr_list(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; int i; - struct smbd_mr *smbdirect_mr, *tmp; - - INIT_LIST_HEAD(&info->mr_list); - init_waitqueue_head(&info->wait_mr); - spin_lock_init(&info->mr_list_lock); - atomic_set(&info->mr_ready_count, 0); - atomic_set(&info->mr_used_count, 0); - init_waitqueue_head(&info->wait_for_mr_cleanup); - INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work); + struct smbdirect_mr_io *smbdirect_mr, *tmp; + + INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); + + if (sp->responder_resources == 0) { + log_rdma_mr(ERR, "responder_resources negotiated as 0\n"); + return -EINVAL; + } + /* Allocate more MRs (2x) than hardware responder_resources */ - for (i = 0; i < info->responder_resources * 2; i++) { + for (i = 0; i < sp->responder_resources * 2; i++) { smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); if (!smbdirect_mr) goto cleanup_entries; - smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, info->mr_type, - info->max_frmr_depth); + smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, sc->mr_io.type, + sp->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", - info->mr_type, info->max_frmr_depth); + sc->mr_io.type, sp->max_frmr_depth); goto out; } - smbdirect_mr->sgt.sgl = kcalloc(info->max_frmr_depth, + smbdirect_mr->sgt.sgl = kcalloc(sp->max_frmr_depth, sizeof(struct scatterlist), GFP_KERNEL); if (!smbdirect_mr->sgt.sgl) { @@ -2141,18 +2407,18 @@ static int allocate_mr_list(struct smbd_connection *info) ib_dereg_mr(smbdirect_mr->mr); goto out; } - smbdirect_mr->state = MR_READY; - smbdirect_mr->conn = info; + smbdirect_mr->state = SMBDIRECT_MR_READY; + smbdirect_mr->socket = sc; - list_add_tail(&smbdirect_mr->list, &info->mr_list); - atomic_inc(&info->mr_ready_count); + list_add_tail(&smbdirect_mr->list, &sc->mr_io.all.list); + atomic_inc(&sc->mr_io.ready.count); } return 0; out: kfree(smbdirect_mr); cleanup_entries: - list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) { + list_for_each_entry_safe(smbdirect_mr, tmp, &sc->mr_io.all.list, list) { list_del(&smbdirect_mr->list); ib_dereg_mr(smbdirect_mr->mr); kfree(smbdirect_mr->sgt.sgl); @@ -2169,14 +2435,14 @@ cleanup_entries: * issuing I/O trying to get MR at the same time, mr_list_lock is used to * protect this situation. */ -static struct smbd_mr *get_mr(struct smbd_connection *info) +static struct smbdirect_mr_io *get_mr(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; - struct smbd_mr *ret; + struct smbdirect_mr_io *ret; + unsigned long flags; int rc; again: - rc = wait_event_interruptible(info->wait_mr, - atomic_read(&info->mr_ready_count) || + rc = wait_event_interruptible(sc->mr_io.ready.wait_queue, + atomic_read(&sc->mr_io.ready.count) || sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) { log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); @@ -2188,18 +2454,18 @@ again: return NULL; } - spin_lock(&info->mr_list_lock); - list_for_each_entry(ret, &info->mr_list, list) { - if (ret->state == MR_READY) { - ret->state = MR_REGISTERED; - spin_unlock(&info->mr_list_lock); - atomic_dec(&info->mr_ready_count); - atomic_inc(&info->mr_used_count); + spin_lock_irqsave(&sc->mr_io.all.lock, flags); + list_for_each_entry(ret, &sc->mr_io.all.list, list) { + if (ret->state == SMBDIRECT_MR_READY) { + ret->state = SMBDIRECT_MR_REGISTERED; + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); + atomic_dec(&sc->mr_io.ready.count); + atomic_inc(&sc->mr_io.used.count); return ret; } } - spin_unlock(&info->mr_list_lock); + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); /* * It is possible that we could fail to get MR because other processes may * try to acquire a MR at the same time. If this is the case, retry it. @@ -2210,8 +2476,7 @@ again: /* * Transcribe the pages from an iterator into an MR scatterlist. */ -static int smbd_iter_to_mr(struct smbd_connection *info, - struct iov_iter *iter, +static int smbd_iter_to_mr(struct iov_iter *iter, struct sg_table *sgt, unsigned int max_sg) { @@ -2233,25 +2498,26 @@ static int smbd_iter_to_mr(struct smbd_connection *info, * need_invalidate: true if this MR needs to be locally invalidated after I/O * return value: the MR registered, NULL if failed. */ -struct smbd_mr *smbd_register_mr(struct smbd_connection *info, +struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate) { struct smbdirect_socket *sc = &info->socket; - struct smbd_mr *smbdirect_mr; + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_mr_io *smbdirect_mr; int rc, num_pages; enum dma_data_direction dir; struct ib_reg_wr *reg_wr; - num_pages = iov_iter_npages(iter, info->max_frmr_depth + 1); - if (num_pages > info->max_frmr_depth) { + num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); + if (num_pages > sp->max_frmr_depth) { log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", - num_pages, info->max_frmr_depth); + num_pages, sp->max_frmr_depth); WARN_ON_ONCE(1); return NULL; } - smbdirect_mr = get_mr(info); + smbdirect_mr = get_mr(sc); if (!smbdirect_mr) { log_rdma_mr(ERR, "get_mr returning NULL\n"); return NULL; @@ -2264,8 +2530,8 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, smbdirect_mr->sgt.orig_nents = 0; log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", - num_pages, iov_iter_count(iter), info->max_frmr_depth); - smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth); + num_pages, iov_iter_count(iter), sp->max_frmr_depth); + smbd_iter_to_mr(iter, &smbdirect_mr->sgt, sp->max_frmr_depth); rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, dir); @@ -2310,32 +2576,32 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", rc, reg_wr->key); - /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ + /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ map_mr_error: ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); dma_map_error: - smbdirect_mr->state = MR_ERROR; - if (atomic_dec_and_test(&info->mr_used_count)) - wake_up(&info->wait_for_mr_cleanup); + smbdirect_mr->state = SMBDIRECT_MR_ERROR; + if (atomic_dec_and_test(&sc->mr_io.used.count)) + wake_up(&sc->mr_io.cleanup.wait_queue); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); return NULL; } static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbd_mr *smbdirect_mr; + struct smbdirect_mr_io *smbdirect_mr; struct ib_cqe *cqe; cqe = wc->wr_cqe; - smbdirect_mr = container_of(cqe, struct smbd_mr, cqe); - smbdirect_mr->state = MR_INVALIDATED; + smbdirect_mr = container_of(cqe, struct smbdirect_mr_io, cqe); + smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; if (wc->status != IB_WC_SUCCESS) { log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status); - smbdirect_mr->state = MR_ERROR; + smbdirect_mr->state = SMBDIRECT_MR_ERROR; } complete(&smbdirect_mr->invalidate_done); } @@ -2346,11 +2612,10 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) * and we have to locally invalidate the buffer to prevent data is being * modified by remote peer after upper layer consumes it */ -int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) +int smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) { struct ib_send_wr *wr; - struct smbd_connection *info = smbdirect_mr->conn; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = smbdirect_mr->socket; int rc = 0; if (smbdirect_mr->need_invalidate) { @@ -2367,36 +2632,36 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); goto done; } wait_for_completion(&smbdirect_mr->invalidate_done); smbdirect_mr->need_invalidate = false; } else /* - * For remote invalidation, just set it to MR_INVALIDATED + * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED * and defer to mr_recovery_work to recover the MR for next use */ - smbdirect_mr->state = MR_INVALIDATED; + smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; - if (smbdirect_mr->state == MR_INVALIDATED) { + if (smbdirect_mr->state == SMBDIRECT_MR_INVALIDATED) { ib_dma_unmap_sg( sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); - smbdirect_mr->state = MR_READY; - if (atomic_inc_return(&info->mr_ready_count) == 1) - wake_up_interruptible(&info->wait_mr); + smbdirect_mr->state = SMBDIRECT_MR_READY; + if (atomic_inc_return(&sc->mr_io.ready.count) == 1) + wake_up(&sc->mr_io.ready.wait_queue); } else /* * Schedule the work to do MR recovery for future I/Os MR * recovery is slow and don't want it to block current I/O */ - queue_work(info->workqueue, &info->mr_recovery_work); + queue_work(sc->workqueue, &sc->mr_io.recovery_work); done: - if (atomic_dec_and_test(&info->mr_used_count)) - wake_up(&info->wait_for_mr_cleanup); + if (atomic_dec_and_test(&sc->mr_io.used.count)) + wake_up(&sc->mr_io.cleanup.wait_queue); return rc; } diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index e45aa9ddd71d..d67ac5ddaff4 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -27,12 +27,6 @@ extern int smbd_max_send_size; extern int smbd_send_credit_target; extern int smbd_receive_credit_max; -enum keep_alive_status { - KEEP_ALIVE_NONE, - KEEP_ALIVE_PENDING, - KEEP_ALIVE_SENT, -}; - /* * The context for the SMBDirect transport * Everything related to the transport is here. It has several logical parts @@ -44,79 +38,14 @@ enum keep_alive_status { */ struct smbd_connection { struct smbdirect_socket socket; - - int ri_rc; - struct completion ri_done; - wait_queue_head_t status_wait; - - struct completion negotiate_completion; - bool negotiate_done; - - struct work_struct disconnect_work; - struct work_struct post_send_credits_work; - - spinlock_t lock_new_credits_offered; - int new_credits_offered; - - /* dynamic connection parameters defined in [MS-SMBD] 3.1.1.1 */ - enum keep_alive_status keep_alive_requested; - int protocol; - atomic_t send_credits; - atomic_t receive_credits; - int receive_credit_target; - - /* Memory registrations */ - /* Maximum number of RDMA read/write outstanding on this connection */ - int responder_resources; - /* Maximum number of pages in a single RDMA write/read on this connection */ - int max_frmr_depth; - /* - * If payload is less than or equal to the threshold, - * use RDMA send/recv to send upper layer I/O. - * If payload is more than the threshold, - * use RDMA read/write through memory registration for I/O. - */ - int rdma_readwrite_threshold; - enum ib_mr_type mr_type; - struct list_head mr_list; - spinlock_t mr_list_lock; - /* The number of available MRs ready for memory registration */ - atomic_t mr_ready_count; - atomic_t mr_used_count; - wait_queue_head_t wait_mr; - struct work_struct mr_recovery_work; - /* Used by transport to wait until all MRs are returned */ - wait_queue_head_t wait_for_mr_cleanup; - - /* Activity accounting */ - atomic_t send_pending; - wait_queue_head_t wait_send_pending; - wait_queue_head_t wait_post_send; - - /* Receive queue */ - int count_receive_queue; - wait_queue_head_t wait_receive_queues; - - bool send_immediate; - - wait_queue_head_t wait_send_queue; - - struct workqueue_struct *workqueue; - struct delayed_work idle_timer_work; - - /* for debug purposes */ - unsigned int count_get_receive_buffer; - unsigned int count_put_receive_buffer; - unsigned int count_reassembly_queue; - unsigned int count_enqueue_reassembly_queue; - unsigned int count_dequeue_reassembly_queue; - unsigned int count_send_empty; }; /* Create a SMBDirect session */ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr); +const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn); + /* Reconnect SMBDirect session */ int smbd_reconnect(struct TCP_Server_Info *server); /* Destroy SMBDirect session */ @@ -127,34 +56,11 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg); int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst); -enum mr_state { - MR_READY, - MR_REGISTERED, - MR_INVALIDATED, - MR_ERROR -}; - -struct smbd_mr { - struct smbd_connection *conn; - struct list_head list; - enum mr_state state; - struct ib_mr *mr; - struct sg_table sgt; - enum dma_data_direction dir; - union { - struct ib_reg_wr wr; - struct ib_send_wr inv_wr; - }; - struct ib_cqe cqe; - bool need_invalidate; - struct completion invalidate_done; -}; - /* Interfaces to register and deregister MR for RDMA read/write */ -struct smbd_mr *smbd_register_mr( +struct smbdirect_mr_io *smbd_register_mr( struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate); -int smbd_deregister_mr(struct smbd_mr *mr); +int smbd_deregister_mr(struct smbdirect_mr_io *mr); #else #define cifs_rdma_enabled(server) 0 diff --git a/fs/smb/common/smbdirect/smbdirect.h b/fs/smb/common/smbdirect/smbdirect.h index b9a385344ff3..05cc6a9d0ccd 100644 --- a/fs/smb/common/smbdirect/smbdirect.h +++ b/fs/smb/common/smbdirect/smbdirect.h @@ -23,6 +23,12 @@ struct smbdirect_buffer_descriptor_v1 { * Some values are important for the upper layer. */ struct smbdirect_socket_parameters { + __u32 resolve_addr_timeout_msec; + __u32 resolve_route_timeout_msec; + __u32 rdma_connect_timeout_msec; + __u32 negotiate_timeout_msec; + __u8 initiator_depth; + __u8 responder_resources; __u16 recv_credit_max; __u16 send_credit_target; __u32 max_send_size; @@ -30,6 +36,7 @@ struct smbdirect_socket_parameters { __u32 max_recv_size; __u32 max_fragmented_recv_size; __u32 max_read_write_size; + __u32 max_frmr_depth; __u32 keepalive_interval_msec; __u32 keepalive_timeout_msec; } __packed; diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 3c4a8d627aa3..db22a1d0546b 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -6,22 +6,102 @@ #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ +#include <rdma/rw.h> + enum smbdirect_socket_status { SMBDIRECT_SOCKET_CREATED, - SMBDIRECT_SOCKET_CONNECTING, - SMBDIRECT_SOCKET_CONNECTED, + SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED, + SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, + SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED, + SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED, + SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, + SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED, + SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED, + SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, + SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED, + SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, + SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, SMBDIRECT_SOCKET_NEGOTIATE_FAILED, + SMBDIRECT_SOCKET_CONNECTED, + SMBDIRECT_SOCKET_ERROR, SMBDIRECT_SOCKET_DISCONNECTING, SMBDIRECT_SOCKET_DISCONNECTED, SMBDIRECT_SOCKET_DESTROYED }; +static __always_inline +const char *smbdirect_socket_status_string(enum smbdirect_socket_status status) +{ + switch (status) { + case SMBDIRECT_SOCKET_CREATED: + return "CREATED"; + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: + return "RESOLVE_ADDR_NEEDED"; + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: + return "RESOLVE_ADDR_RUNNING"; + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: + return "RESOLVE_ADDR_FAILED"; + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: + return "RESOLVE_ROUTE_NEEDED"; + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: + return "RESOLVE_ROUTE_RUNNING"; + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: + return "RESOLVE_ROUTE_FAILED"; + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: + return "RDMA_CONNECT_NEEDED"; + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: + return "RDMA_CONNECT_RUNNING"; + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: + return "RDMA_CONNECT_FAILED"; + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: + return "NEGOTIATE_NEEDED"; + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: + return "NEGOTIATE_RUNNING"; + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: + return "NEGOTIATE_FAILED"; + case SMBDIRECT_SOCKET_CONNECTED: + return "CONNECTED"; + case SMBDIRECT_SOCKET_ERROR: + return "ERROR"; + case SMBDIRECT_SOCKET_DISCONNECTING: + return "DISCONNECTING"; + case SMBDIRECT_SOCKET_DISCONNECTED: + return "DISCONNECTED"; + case SMBDIRECT_SOCKET_DESTROYED: + return "DESTROYED"; + } + + return "<unknown>"; +} + +enum smbdirect_keepalive_status { + SMBDIRECT_KEEPALIVE_NONE, + SMBDIRECT_KEEPALIVE_PENDING, + SMBDIRECT_KEEPALIVE_SENT +}; + struct smbdirect_socket { enum smbdirect_socket_status status; + wait_queue_head_t status_wait; + int first_error; + + /* + * This points to the workqueue to + * be used for this socket. + * It can be per socket (on the client) + * or point to a global workqueue (on the server) + */ + struct workqueue_struct *workqueue; + + struct work_struct disconnect_work; /* RDMA related */ struct { struct rdma_cm_id *cm_id; + /* + * This is for iWarp MPA v1 + */ + bool legacy_iwarp; } rdma; /* IB verbs related */ @@ -40,6 +120,15 @@ struct smbdirect_socket { struct smbdirect_socket_parameters parameters; /* + * The state for keepalive and timeout handling + */ + struct { + enum smbdirect_keepalive_status keepalive; + struct work_struct immediate_work; + struct delayed_work timer_work; + } idle; + + /* * The state for posted send buffers */ struct { @@ -51,6 +140,29 @@ struct smbdirect_socket { struct kmem_cache *cache; mempool_t *pool; } mem; + + /* + * The credit state for the send side + */ + struct { + atomic_t count; + wait_queue_head_t wait_queue; + } credits; + + /* + * The state about posted/pending sends + */ + struct { + atomic_t count; + /* + * woken when count is decremented + */ + wait_queue_head_t dec_wait_queue; + /* + * woken when count reached zero + */ + wait_queue_head_t zero_wait_queue; + } pending; } send_io; /* @@ -85,6 +197,23 @@ struct smbdirect_socket { } free; /* + * The state for posted recv_io messages + * and the refill work struct. + */ + struct { + atomic_t count; + struct work_struct refill_work; + } posted; + + /* + * The credit state for the recv side + */ + struct { + u16 target; + atomic_t count; + } credits; + + /* * The list of arrived non-empty smbdirect_recv_io * structures * @@ -110,8 +239,137 @@ struct smbdirect_socket { bool full_packet_received; } reassembly; } recv_io; + + /* + * The state for Memory registrations on the client + */ + struct { + enum ib_mr_type type; + + /* + * The list of free smbdirect_mr_io + * structures + */ + struct { + struct list_head list; + spinlock_t lock; + } all; + + /* + * The number of available MRs ready for memory registration + */ + struct { + atomic_t count; + wait_queue_head_t wait_queue; + } ready; + + /* + * The number of used MRs + */ + struct { + atomic_t count; + } used; + + struct work_struct recovery_work; + + /* Used by transport to wait until all MRs are returned */ + struct { + wait_queue_head_t wait_queue; + } cleanup; + } mr_io; + + /* + * The state for RDMA read/write requests on the server + */ + struct { + /* + * The credit state for the send side + */ + struct { + /* + * The maximum number of rw credits + */ + size_t max; + /* + * The number of pages per credit + */ + size_t num_pages; + atomic_t count; + wait_queue_head_t wait_queue; + } credits; + } rw_io; + + /* + * For debug purposes + */ + struct { + u64 get_receive_buffer; + u64 put_receive_buffer; + u64 enqueue_reassembly_queue; + u64 dequeue_reassembly_queue; + u64 send_empty; + } statistics; }; +static void __smbdirect_socket_disabled_work(struct work_struct *work) +{ + /* + * Should never be called as disable_[delayed_]work_sync() was used. + */ + WARN_ON_ONCE(1); +} + +static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) +{ + /* + * This also sets status = SMBDIRECT_SOCKET_CREATED + */ + BUILD_BUG_ON(SMBDIRECT_SOCKET_CREATED != 0); + memset(sc, 0, sizeof(*sc)); + + init_waitqueue_head(&sc->status_wait); + + INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); + disable_work_sync(&sc->disconnect_work); + + INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work); + disable_work_sync(&sc->idle.immediate_work); + INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); + disable_delayed_work_sync(&sc->idle.timer_work); + + atomic_set(&sc->send_io.credits.count, 0); + init_waitqueue_head(&sc->send_io.credits.wait_queue); + + atomic_set(&sc->send_io.pending.count, 0); + init_waitqueue_head(&sc->send_io.pending.dec_wait_queue); + init_waitqueue_head(&sc->send_io.pending.zero_wait_queue); + + INIT_LIST_HEAD(&sc->recv_io.free.list); + spin_lock_init(&sc->recv_io.free.lock); + + atomic_set(&sc->recv_io.posted.count, 0); + INIT_WORK(&sc->recv_io.posted.refill_work, __smbdirect_socket_disabled_work); + disable_work_sync(&sc->recv_io.posted.refill_work); + + atomic_set(&sc->recv_io.credits.count, 0); + + INIT_LIST_HEAD(&sc->recv_io.reassembly.list); + spin_lock_init(&sc->recv_io.reassembly.lock); + init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); + + atomic_set(&sc->rw_io.credits.count, 0); + init_waitqueue_head(&sc->rw_io.credits.wait_queue); + + spin_lock_init(&sc->mr_io.all.lock); + INIT_LIST_HEAD(&sc->mr_io.all.list); + atomic_set(&sc->mr_io.ready.count, 0); + init_waitqueue_head(&sc->mr_io.ready.wait_queue); + atomic_set(&sc->mr_io.used.count, 0); + INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work); + disable_work_sync(&sc->mr_io.recovery_work); + init_waitqueue_head(&sc->mr_io.cleanup.wait_queue); +} + struct smbdirect_send_io { struct smbdirect_socket *socket; struct ib_cqe cqe; @@ -136,6 +394,23 @@ struct smbdirect_send_io { u8 packet[]; }; +struct smbdirect_send_batch { + /* + * List of smbdirect_send_io messages + */ + struct list_head msg_list; + /* + * Number of list entries + */ + size_t wr_cnt; + + /* + * Possible remote key invalidation state + */ + bool need_invalidate_rkey; + u32 remote_key; +}; + struct smbdirect_recv_io { struct smbdirect_socket *socket; struct ib_cqe cqe; @@ -158,4 +433,44 @@ struct smbdirect_recv_io { u8 packet[]; }; +enum smbdirect_mr_state { + SMBDIRECT_MR_READY, + SMBDIRECT_MR_REGISTERED, + SMBDIRECT_MR_INVALIDATED, + SMBDIRECT_MR_ERROR +}; + +struct smbdirect_mr_io { + struct smbdirect_socket *socket; + struct ib_cqe cqe; + + struct list_head list; + + enum smbdirect_mr_state state; + struct ib_mr *mr; + struct sg_table sgt; + enum dma_data_direction dir; + union { + struct ib_reg_wr wr; + struct ib_send_wr inv_wr; + }; + + bool need_invalidate; + struct completion invalidate_done; +}; + +struct smbdirect_rw_io { + struct smbdirect_socket *socket; + struct ib_cqe cqe; + + struct list_head list; + + int error; + struct completion *completion; + + struct rdma_rw_ctx rdma_ctx; + struct sg_table sgt; + struct scatterlist sg_list[]; +}; + #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 67c4f73398df..91a934411134 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -243,7 +243,7 @@ int ksmbd_conn_write(struct ksmbd_work *work) int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf, unsigned int buflen, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len) { int ret = -EINVAL; @@ -257,7 +257,7 @@ int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf, unsigned int buflen, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len) { int ret = -EINVAL; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 2aa8084bb593..07b43634262a 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -19,6 +19,8 @@ #include "smb_common.h" #include "ksmbd_work.h" +struct smbdirect_buffer_descriptor_v1; + #define KSMBD_SOCKET_BACKLOG 16 enum { @@ -133,11 +135,11 @@ struct ksmbd_transport_ops { unsigned int remote_key); int (*rdma_read)(struct ksmbd_transport *t, void *buf, unsigned int len, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len); int (*rdma_write)(struct ksmbd_transport *t, void *buf, unsigned int len, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len); void (*free_transport)(struct ksmbd_transport *kt); }; @@ -163,11 +165,11 @@ bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); int ksmbd_conn_write(struct ksmbd_work *work); int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf, unsigned int buflen, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len); int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf, unsigned int buflen, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len); void ksmbd_conn_enqueue_request(struct ksmbd_work *work); void ksmbd_conn_try_dequeue_request(struct ksmbd_work *work); diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 8c9c49c3a0a4..40420544cc25 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -365,6 +365,7 @@ static void server_ctrl_handle_init(struct server_ctrl_struct *ctrl) return; } + pr_info("running\n"); WRITE_ONCE(server_conf.state, SERVER_STATE_RUNNING); } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index a565fc36cee6..0c069eff80b7 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -23,6 +23,7 @@ #include "asn1.h" #include "connection.h" #include "transport_ipc.h" +#include "../common/smbdirect/smbdirect.h" #include "transport_rdma.h" #include "vfs.h" #include "vfs_cache.h" @@ -6665,7 +6666,7 @@ out: } static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, __le32 Channel, __le16 ChannelInfoLength) { @@ -6701,7 +6702,7 @@ static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work, int err; err = ksmbd_conn_rdma_write(work->conn, data_buf, length, - (struct smb2_buffer_desc_v1 *) + (struct smbdirect_buffer_descriptor_v1 *) ((char *)req + le16_to_cpu(req->ReadChannelInfoOffset)), le16_to_cpu(req->ReadChannelInfoLength)); if (err) @@ -6761,7 +6762,11 @@ int smb2_read(struct ksmbd_work *work) if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE || req->Channel == SMB2_CHANNEL_RDMA_V1) { is_rdma_channel = true; - max_read_size = get_smbd_max_read_write_size(); + max_read_size = get_smbd_max_read_write_size(work->conn->transport); + if (max_read_size == 0) { + err = -EINVAL; + goto out; + } } if (is_rdma_channel == true) { @@ -6772,7 +6777,7 @@ int smb2_read(struct ksmbd_work *work) goto out; } err = smb2_set_remote_key_for_rdma(work, - (struct smb2_buffer_desc_v1 *) + (struct smbdirect_buffer_descriptor_v1 *) ((char *)req + ch_offset), req->Channel, req->ReadChannelInfoLength); @@ -6967,7 +6972,7 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work, return -ENOMEM; ret = ksmbd_conn_rdma_read(work->conn, data_buf, length, - (struct smb2_buffer_desc_v1 *) + (struct smbdirect_buffer_descriptor_v1 *) ((char *)req + le16_to_cpu(req->WriteChannelInfoOffset)), le16_to_cpu(req->WriteChannelInfoLength)); if (ret < 0) { @@ -7019,7 +7024,11 @@ int smb2_write(struct ksmbd_work *work) if (req->Channel == SMB2_CHANNEL_RDMA_V1 || req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) { is_rdma_channel = true; - max_write_size = get_smbd_max_read_write_size(); + max_write_size = get_smbd_max_read_write_size(work->conn->transport); + if (max_write_size == 0) { + err = -EINVAL; + goto out; + } length = le32_to_cpu(req->RemainingBytes); } @@ -7032,7 +7041,7 @@ int smb2_write(struct ksmbd_work *work) goto out; } err = smb2_set_remote_key_for_rdma(work, - (struct smb2_buffer_desc_v1 *) + (struct smbdirect_buffer_descriptor_v1 *) ((char *)req + ch_offset), req->Channel, req->WriteChannelInfoLength); diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h index 16ae8a10490b..5163d5241b90 100644 --- a/fs/smb/server/smb2pdu.h +++ b/fs/smb/server/smb2pdu.h @@ -136,12 +136,6 @@ struct create_posix_rsp { u8 SidBuffer[44]; } __packed; -struct smb2_buffer_desc_v1 { - __le64 offset; - __le32 token; - __le32 length; -} __packed; - #define SMB2_0_IOCTL_IS_FSCTL 0x00000001 struct smb_sockaddr_in { diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 5df138fc6004..9e644a0daf1c 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -23,18 +23,24 @@ #include "connection.h" #include "smb_common.h" #include "../common/smb2status.h" +#include "../common/smbdirect/smbdirect.h" +#include "../common/smbdirect/smbdirect_pdu.h" +#include "../common/smbdirect/smbdirect_socket.h" #include "transport_rdma.h" #define SMB_DIRECT_PORT_IWARP 5445 #define SMB_DIRECT_PORT_INFINIBAND 445 -#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100) +#define SMB_DIRECT_VERSION_LE cpu_to_le16(SMBDIRECT_V1) -/* SMB_DIRECT negotiation timeout in seconds */ -#define SMB_DIRECT_NEGOTIATE_TIMEOUT 120 +/* SMB_DIRECT negotiation timeout (for the server) in seconds */ +#define SMB_DIRECT_NEGOTIATE_TIMEOUT 5 -#define SMB_DIRECT_MAX_SEND_SGES 6 -#define SMB_DIRECT_MAX_RECV_SGES 1 +/* The timeout to wait for a keepalive message from peer in seconds */ +#define SMB_DIRECT_KEEPALIVE_SEND_INTERVAL 120 + +/* The timeout to wait for a keepalive message from peer in seconds */ +#define SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT 5 /* * Default maximum number of RDMA read/write outstanding on this connection @@ -87,131 +93,38 @@ static struct smb_direct_listener { static struct workqueue_struct *smb_direct_wq; -enum smb_direct_status { - SMB_DIRECT_CS_NEW = 0, - SMB_DIRECT_CS_CONNECTED, - SMB_DIRECT_CS_DISCONNECTING, - SMB_DIRECT_CS_DISCONNECTED, -}; - struct smb_direct_transport { struct ksmbd_transport transport; - enum smb_direct_status status; - bool full_packet_received; - wait_queue_head_t wait_status; - - struct rdma_cm_id *cm_id; - struct ib_cq *send_cq; - struct ib_cq *recv_cq; - struct ib_pd *pd; - struct ib_qp *qp; - - int max_send_size; - int max_recv_size; - int max_fragmented_send_size; - int max_fragmented_recv_size; - int max_rdma_rw_size; - - spinlock_t reassembly_queue_lock; - struct list_head reassembly_queue; - int reassembly_data_length; - int reassembly_queue_length; - int first_entry_offset; - wait_queue_head_t wait_reassembly_queue; - - spinlock_t receive_credit_lock; - int recv_credits; - int count_avail_recvmsg; - int recv_credit_max; - int recv_credit_target; - - spinlock_t recvmsg_queue_lock; - struct list_head recvmsg_queue; - - int send_credit_target; - atomic_t send_credits; - spinlock_t lock_new_recv_credits; - int new_recv_credits; - int max_rw_credits; - int pages_per_rw_credit; - atomic_t rw_credits; - - wait_queue_head_t wait_send_credits; - wait_queue_head_t wait_rw_credits; - - mempool_t *sendmsg_mempool; - struct kmem_cache *sendmsg_cache; - mempool_t *recvmsg_mempool; - struct kmem_cache *recvmsg_cache; - - wait_queue_head_t wait_send_pending; - atomic_t send_pending; - - struct work_struct post_recv_credits_work; - struct work_struct send_immediate_work; - struct work_struct disconnect_work; - - bool negotiation_requested; + struct smbdirect_socket socket; }; -#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) -#define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \ +#define KSMBD_TRANS(t) (&(t)->transport) +#define SMBD_TRANS(t) (container_of(t, \ struct smb_direct_transport, transport)) -enum { - SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, - SMB_DIRECT_MSG_DATA_TRANSFER -}; static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; -struct smb_direct_send_ctx { - struct list_head msg_list; - int wr_cnt; - bool need_invalidate_rkey; - unsigned int remote_key; -}; - -struct smb_direct_sendmsg { - struct smb_direct_transport *transport; - struct ib_send_wr wr; - struct list_head list; - int num_sge; - struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES]; - struct ib_cqe cqe; - u8 packet[]; -}; - -struct smb_direct_recvmsg { - struct smb_direct_transport *transport; - struct list_head list; - int type; - struct ib_sge sge; - struct ib_cqe cqe; - bool first_segment; - u8 packet[]; -}; - -struct smb_direct_rdma_rw_msg { - struct smb_direct_transport *t; - struct ib_cqe cqe; - int status; - struct completion *completion; - struct list_head list; - struct rdma_rw_ctx rw_ctx; - struct sg_table sgt; - struct scatterlist sg_list[]; -}; - void init_smbd_max_io_size(unsigned int sz) { sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); smb_direct_max_read_write_size = sz; } -unsigned int get_smbd_max_read_write_size(void) +unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) { - return smb_direct_max_read_write_size; + struct smb_direct_transport *t; + struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; + + if (kt->ops != &ksmbd_smb_direct_transport_ops) + return 0; + + t = SMBD_TRANS(kt); + sc = &t->socket; + sp = &sc->parameters; + + return sp->max_read_write_size; } static inline int get_buf_page_count(void *buf, int size) @@ -220,71 +133,65 @@ static inline int get_buf_page_count(void *buf, int size) (uintptr_t)buf / PAGE_SIZE; } -static void smb_direct_destroy_pools(struct smb_direct_transport *transport); +static void smb_direct_destroy_pools(struct smbdirect_socket *sc); static void smb_direct_post_recv_credits(struct work_struct *work); -static int smb_direct_post_send_data(struct smb_direct_transport *t, - struct smb_direct_send_ctx *send_ctx, +static int smb_direct_post_send_data(struct smbdirect_socket *sc, + struct smbdirect_send_batch *send_ctx, struct kvec *iov, int niov, int remaining_data_length); -static inline struct smb_direct_transport * -smb_trans_direct_transfort(struct ksmbd_transport *t) -{ - return container_of(t, struct smb_direct_transport, transport); -} - static inline void -*smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg) +*smbdirect_recv_io_payload(struct smbdirect_recv_io *recvmsg) { return (void *)recvmsg->packet; } -static inline bool is_receive_credit_post_required(int receive_credits, - int avail_recvmsg_count) -{ - return receive_credits <= (smb_direct_receive_credit_max >> 3) && - avail_recvmsg_count >= (receive_credits >> 2); -} - static struct -smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t) +smbdirect_recv_io *get_free_recvmsg(struct smbdirect_socket *sc) { - struct smb_direct_recvmsg *recvmsg = NULL; + struct smbdirect_recv_io *recvmsg = NULL; + unsigned long flags; - spin_lock(&t->recvmsg_queue_lock); - if (!list_empty(&t->recvmsg_queue)) { - recvmsg = list_first_entry(&t->recvmsg_queue, - struct smb_direct_recvmsg, + spin_lock_irqsave(&sc->recv_io.free.lock, flags); + if (!list_empty(&sc->recv_io.free.list)) { + recvmsg = list_first_entry(&sc->recv_io.free.list, + struct smbdirect_recv_io, list); list_del(&recvmsg->list); } - spin_unlock(&t->recvmsg_queue_lock); + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); return recvmsg; } -static void put_recvmsg(struct smb_direct_transport *t, - struct smb_direct_recvmsg *recvmsg) +static void put_recvmsg(struct smbdirect_socket *sc, + struct smbdirect_recv_io *recvmsg) { + unsigned long flags; + if (likely(recvmsg->sge.length != 0)) { - ib_dma_unmap_single(t->cm_id->device, + ib_dma_unmap_single(sc->ib.dev, recvmsg->sge.addr, recvmsg->sge.length, DMA_FROM_DEVICE); recvmsg->sge.length = 0; } - spin_lock(&t->recvmsg_queue_lock); - list_add(&recvmsg->list, &t->recvmsg_queue); - spin_unlock(&t->recvmsg_queue_lock); + spin_lock_irqsave(&sc->recv_io.free.lock, flags); + list_add(&recvmsg->list, &sc->recv_io.free.list); + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); + + queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); } -static void enqueue_reassembly(struct smb_direct_transport *t, - struct smb_direct_recvmsg *recvmsg, +static void enqueue_reassembly(struct smbdirect_socket *sc, + struct smbdirect_recv_io *recvmsg, int data_length) { - spin_lock(&t->reassembly_queue_lock); - list_add_tail(&recvmsg->list, &t->reassembly_queue); - t->reassembly_queue_length++; + unsigned long flags; + + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); + list_add_tail(&recvmsg->list, &sc->recv_io.reassembly.list); + sc->recv_io.reassembly.queue_length++; /* * Make sure reassembly_data_length is updated after list and * reassembly_queue_length are updated. On the dequeue side @@ -292,85 +199,228 @@ static void enqueue_reassembly(struct smb_direct_transport *t, * if reassembly_queue_length and list is up to date */ virt_wmb(); - t->reassembly_data_length += data_length; - spin_unlock(&t->reassembly_queue_lock); + sc->recv_io.reassembly.data_length += data_length; + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); } -static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t) +static struct smbdirect_recv_io *get_first_reassembly(struct smbdirect_socket *sc) { - if (!list_empty(&t->reassembly_queue)) - return list_first_entry(&t->reassembly_queue, - struct smb_direct_recvmsg, list); + if (!list_empty(&sc->recv_io.reassembly.list)) + return list_first_entry(&sc->recv_io.reassembly.list, + struct smbdirect_recv_io, list); else return NULL; } +static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc) +{ + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.credits.wait_queue); + wake_up_all(&sc->send_io.pending.zero_wait_queue); + wake_up_all(&sc->recv_io.reassembly.wait_queue); + wake_up_all(&sc->rw_io.credits.wait_queue); +} + static void smb_direct_disconnect_rdma_work(struct work_struct *work) { - struct smb_direct_transport *t = - container_of(work, struct smb_direct_transport, - disconnect_work); + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, disconnect_work); + + /* + * make sure this and other work is not queued again + * but here we don't block and avoid + * disable[_delayed]_work_sync() + */ + disable_work(&sc->disconnect_work); + disable_work(&sc->recv_io.posted.refill_work); + disable_delayed_work(&sc->idle.timer_work); + disable_work(&sc->idle.immediate_work); + + if (sc->first_error == 0) + sc->first_error = -ECONNABORTED; + + switch (sc->status) { + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: + case SMBDIRECT_SOCKET_CONNECTED: + case SMBDIRECT_SOCKET_ERROR: + sc->status = SMBDIRECT_SOCKET_DISCONNECTING; + rdma_disconnect(sc->rdma.cm_id); + break; + + case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: + /* + * rdma_accept() never reached + * RDMA_CM_EVENT_ESTABLISHED + */ + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + break; - if (t->status == SMB_DIRECT_CS_CONNECTED) { - t->status = SMB_DIRECT_CS_DISCONNECTING; - rdma_disconnect(t->cm_id); + case SMBDIRECT_SOCKET_DISCONNECTING: + case SMBDIRECT_SOCKET_DISCONNECTED: + case SMBDIRECT_SOCKET_DESTROYED: + break; } + + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + smb_direct_disconnect_wake_up_all(sc); } static void -smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t) +smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) { - if (t->status == SMB_DIRECT_CS_CONNECTED) - queue_work(smb_direct_wq, &t->disconnect_work); + /* + * make sure other work (than disconnect_work) is + * not queued again but here we don't block and avoid + * disable[_delayed]_work_sync() + */ + disable_work(&sc->recv_io.posted.refill_work); + disable_work(&sc->idle.immediate_work); + disable_delayed_work(&sc->idle.timer_work); + + if (sc->first_error == 0) + sc->first_error = -ECONNABORTED; + + switch (sc->status) { + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: + case SMBDIRECT_SOCKET_ERROR: + case SMBDIRECT_SOCKET_DISCONNECTING: + case SMBDIRECT_SOCKET_DISCONNECTED: + case SMBDIRECT_SOCKET_DESTROYED: + /* + * Keep the current error status + */ + break; + + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; + break; + + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; + break; + + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; + break; + + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; + break; + + case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_CONNECTED: + sc->status = SMBDIRECT_SOCKET_ERROR; + break; + } + + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + smb_direct_disconnect_wake_up_all(sc); + + queue_work(sc->workqueue, &sc->disconnect_work); } static void smb_direct_send_immediate_work(struct work_struct *work) { - struct smb_direct_transport *t = container_of(work, - struct smb_direct_transport, send_immediate_work); + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, idle.immediate_work); - if (t->status != SMB_DIRECT_CS_CONNECTED) + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return; - smb_direct_post_send_data(t, NULL, NULL, 0, 0); + smb_direct_post_send_data(sc, NULL, NULL, 0, 0); +} + +static void smb_direct_idle_connection_timer(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, idle.timer_work.work); + struct smbdirect_socket_parameters *sp = &sc->parameters; + + if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { + smb_direct_disconnect_rdma_connection(sc); + return; + } + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + return; + + /* + * Now use the keepalive timeout (instead of keepalive interval) + * in order to wait for a response + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_timeout_msec)); + queue_work(sc->workqueue, &sc->idle.immediate_work); } static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) { struct smb_direct_transport *t; + struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; struct ksmbd_conn *conn; t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP); if (!t) return NULL; + sc = &t->socket; + smbdirect_socket_init(sc); + sp = &sc->parameters; - t->cm_id = cm_id; - cm_id->context = t; + sc->workqueue = smb_direct_wq; - t->status = SMB_DIRECT_CS_NEW; - init_waitqueue_head(&t->wait_status); + INIT_WORK(&sc->disconnect_work, smb_direct_disconnect_rdma_work); - spin_lock_init(&t->reassembly_queue_lock); - INIT_LIST_HEAD(&t->reassembly_queue); - t->reassembly_data_length = 0; - t->reassembly_queue_length = 0; - init_waitqueue_head(&t->wait_reassembly_queue); - init_waitqueue_head(&t->wait_send_credits); - init_waitqueue_head(&t->wait_rw_credits); + sp->negotiate_timeout_msec = SMB_DIRECT_NEGOTIATE_TIMEOUT * 1000; + sp->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH; + sp->responder_resources = 1; + sp->recv_credit_max = smb_direct_receive_credit_max; + sp->send_credit_target = smb_direct_send_credit_target; + sp->max_send_size = smb_direct_max_send_size; + sp->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; + sp->max_recv_size = smb_direct_max_receive_size; + sp->max_read_write_size = smb_direct_max_read_write_size; + sp->keepalive_interval_msec = SMB_DIRECT_KEEPALIVE_SEND_INTERVAL * 1000; + sp->keepalive_timeout_msec = SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT * 1000; - spin_lock_init(&t->receive_credit_lock); - spin_lock_init(&t->recvmsg_queue_lock); - INIT_LIST_HEAD(&t->recvmsg_queue); + sc->rdma.cm_id = cm_id; + cm_id->context = sc; - init_waitqueue_head(&t->wait_send_pending); - atomic_set(&t->send_pending, 0); + sc->ib.dev = sc->rdma.cm_id->device; - spin_lock_init(&t->lock_new_recv_credits); - - INIT_WORK(&t->post_recv_credits_work, + INIT_WORK(&sc->recv_io.posted.refill_work, smb_direct_post_recv_credits); - INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work); - INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work); + INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); + INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer); conn = ksmbd_conn_alloc(); if (!conn) @@ -391,89 +441,104 @@ static void smb_direct_free_transport(struct ksmbd_transport *kt) static void free_transport(struct smb_direct_transport *t) { - struct smb_direct_recvmsg *recvmsg; + struct smbdirect_socket *sc = &t->socket; + struct smbdirect_recv_io *recvmsg; - wake_up_interruptible(&t->wait_send_credits); + disable_work_sync(&sc->disconnect_work); + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + smb_direct_disconnect_rdma_work(&sc->disconnect_work); + wait_event_interruptible(sc->status_wait, + sc->status == SMBDIRECT_SOCKET_DISCONNECTED); + } - ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n"); - wait_event(t->wait_send_pending, - atomic_read(&t->send_pending) == 0); + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + * + * Most likely this was already called via + * smb_direct_disconnect_rdma_work(), but call it again... + */ + smb_direct_disconnect_wake_up_all(sc); - disable_work_sync(&t->disconnect_work); - disable_work_sync(&t->post_recv_credits_work); - disable_work_sync(&t->send_immediate_work); + disable_work_sync(&sc->recv_io.posted.refill_work); + disable_delayed_work_sync(&sc->idle.timer_work); + disable_work_sync(&sc->idle.immediate_work); - if (t->qp) { - ib_drain_qp(t->qp); - ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); - t->qp = NULL; - rdma_destroy_qp(t->cm_id); + if (sc->ib.qp) { + ib_drain_qp(sc->ib.qp); + ib_mr_pool_destroy(sc->ib.qp, &sc->ib.qp->rdma_mrs); + sc->ib.qp = NULL; + rdma_destroy_qp(sc->rdma.cm_id); } ksmbd_debug(RDMA, "drain the reassembly queue\n"); do { - spin_lock(&t->reassembly_queue_lock); - recvmsg = get_first_reassembly(t); + unsigned long flags; + + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); + recvmsg = get_first_reassembly(sc); if (recvmsg) { list_del(&recvmsg->list); - spin_unlock(&t->reassembly_queue_lock); - put_recvmsg(t, recvmsg); + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); + put_recvmsg(sc, recvmsg); } else { - spin_unlock(&t->reassembly_queue_lock); + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); } } while (recvmsg); - t->reassembly_data_length = 0; - - if (t->send_cq) - ib_free_cq(t->send_cq); - if (t->recv_cq) - ib_free_cq(t->recv_cq); - if (t->pd) - ib_dealloc_pd(t->pd); - if (t->cm_id) - rdma_destroy_id(t->cm_id); - - smb_direct_destroy_pools(t); + sc->recv_io.reassembly.data_length = 0; + + if (sc->ib.send_cq) + ib_free_cq(sc->ib.send_cq); + if (sc->ib.recv_cq) + ib_free_cq(sc->ib.recv_cq); + if (sc->ib.pd) + ib_dealloc_pd(sc->ib.pd); + if (sc->rdma.cm_id) + rdma_destroy_id(sc->rdma.cm_id); + + smb_direct_destroy_pools(sc); ksmbd_conn_free(KSMBD_TRANS(t)->conn); } -static struct smb_direct_sendmsg -*smb_direct_alloc_sendmsg(struct smb_direct_transport *t) +static struct smbdirect_send_io +*smb_direct_alloc_sendmsg(struct smbdirect_socket *sc) { - struct smb_direct_sendmsg *msg; + struct smbdirect_send_io *msg; - msg = mempool_alloc(t->sendmsg_mempool, KSMBD_DEFAULT_GFP); + msg = mempool_alloc(sc->send_io.mem.pool, KSMBD_DEFAULT_GFP); if (!msg) return ERR_PTR(-ENOMEM); - msg->transport = t; - INIT_LIST_HEAD(&msg->list); + msg->socket = sc; + INIT_LIST_HEAD(&msg->sibling_list); msg->num_sge = 0; return msg; } -static void smb_direct_free_sendmsg(struct smb_direct_transport *t, - struct smb_direct_sendmsg *msg) +static void smb_direct_free_sendmsg(struct smbdirect_socket *sc, + struct smbdirect_send_io *msg) { int i; if (msg->num_sge > 0) { - ib_dma_unmap_single(t->cm_id->device, + ib_dma_unmap_single(sc->ib.dev, msg->sge[0].addr, msg->sge[0].length, DMA_TO_DEVICE); for (i = 1; i < msg->num_sge; i++) - ib_dma_unmap_page(t->cm_id->device, + ib_dma_unmap_page(sc->ib.dev, msg->sge[i].addr, msg->sge[i].length, DMA_TO_DEVICE); } - mempool_free(msg, t->sendmsg_mempool); + mempool_free(msg, sc->send_io.mem.pool); } -static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg) +static int smb_direct_check_recvmsg(struct smbdirect_recv_io *recvmsg) { - switch (recvmsg->type) { - case SMB_DIRECT_MSG_DATA_TRANSFER: { - struct smb_direct_data_transfer *req = - (struct smb_direct_data_transfer *)recvmsg->packet; + struct smbdirect_socket *sc = recvmsg->socket; + + switch (sc->recv_io.expected) { + case SMBDIRECT_EXPECT_DATA_TRANSFER: { + struct smbdirect_data_transfer *req = + (struct smbdirect_data_transfer *)recvmsg->packet; struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet + le32_to_cpu(req->data_offset)); ksmbd_debug(RDMA, @@ -482,11 +547,11 @@ static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg) le16_to_cpu(req->credits_requested), req->data_length, req->remaining_data_length, hdr->ProtocolId, hdr->Command); - break; + return 0; } - case SMB_DIRECT_MSG_NEGOTIATE_REQ: { - struct smb_direct_negotiate_req *req = - (struct smb_direct_negotiate_req *)recvmsg->packet; + case SMBDIRECT_EXPECT_NEGOTIATE_REQ: { + struct smbdirect_negotiate_req *req = + (struct smbdirect_negotiate_req *)recvmsg->packet; ksmbd_debug(RDMA, "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", le16_to_cpu(req->min_version), @@ -504,29 +569,34 @@ static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg) 128 * 1024) return -ECONNABORTED; - break; + return 0; } - default: - return -EINVAL; + case SMBDIRECT_EXPECT_NEGOTIATE_REP: + /* client only */ + break; } - return 0; + + /* This is an internal error */ + return -EINVAL; } static void recv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smb_direct_recvmsg *recvmsg; - struct smb_direct_transport *t; + struct smbdirect_recv_io *recvmsg; + struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; - recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe); - t = recvmsg->transport; + recvmsg = container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); + sc = recvmsg->socket; + sp = &sc->parameters; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { - put_recvmsg(t, recvmsg); + put_recvmsg(sc, recvmsg); if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_err("Recv error. status='%s (%d)' opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); - smb_direct_disconnect_rdma_connection(t); + smb_direct_disconnect_rdma_connection(sc); } return; } @@ -538,29 +608,37 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, recvmsg->sge.length, DMA_FROM_DEVICE); - switch (recvmsg->type) { - case SMB_DIRECT_MSG_NEGOTIATE_REQ: - if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { - put_recvmsg(t, recvmsg); - smb_direct_disconnect_rdma_connection(t); + /* + * Reset timer to the keepalive interval in + * order to trigger our next keepalive message. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_interval_msec)); + + switch (sc->recv_io.expected) { + case SMBDIRECT_EXPECT_NEGOTIATE_REQ: + if (wc->byte_len < sizeof(struct smbdirect_negotiate_req)) { + put_recvmsg(sc, recvmsg); + smb_direct_disconnect_rdma_connection(sc); return; } - t->negotiation_requested = true; - t->full_packet_received = true; - t->status = SMB_DIRECT_CS_CONNECTED; - enqueue_reassembly(t, recvmsg, 0); - wake_up_interruptible(&t->wait_status); + sc->recv_io.reassembly.full_packet_received = true; + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED); + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; + enqueue_reassembly(sc, recvmsg, 0); + wake_up(&sc->status_wait); return; - case SMB_DIRECT_MSG_DATA_TRANSFER: { - struct smb_direct_data_transfer *data_transfer = - (struct smb_direct_data_transfer *)recvmsg->packet; + case SMBDIRECT_EXPECT_DATA_TRANSFER: { + struct smbdirect_data_transfer *data_transfer = + (struct smbdirect_data_transfer *)recvmsg->packet; u32 remaining_data_length, data_offset, data_length; - int avail_recvmsg_count, receive_credits; + u16 old_recv_credit_target; if (wc->byte_len < - offsetof(struct smb_direct_data_transfer, padding)) { - put_recvmsg(t, recvmsg); - smb_direct_disconnect_rdma_connection(t); + offsetof(struct smbdirect_data_transfer, padding)) { + put_recvmsg(sc, recvmsg); + smb_direct_disconnect_rdma_connection(sc); return; } @@ -569,86 +647,89 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) data_offset = le32_to_cpu(data_transfer->data_offset); if (wc->byte_len < data_offset || wc->byte_len < (u64)data_offset + data_length) { - put_recvmsg(t, recvmsg); - smb_direct_disconnect_rdma_connection(t); + put_recvmsg(sc, recvmsg); + smb_direct_disconnect_rdma_connection(sc); return; } - if (remaining_data_length > t->max_fragmented_recv_size || - data_length > t->max_fragmented_recv_size || + if (remaining_data_length > sp->max_fragmented_recv_size || + data_length > sp->max_fragmented_recv_size || (u64)remaining_data_length + (u64)data_length > - (u64)t->max_fragmented_recv_size) { - put_recvmsg(t, recvmsg); - smb_direct_disconnect_rdma_connection(t); + (u64)sp->max_fragmented_recv_size) { + put_recvmsg(sc, recvmsg); + smb_direct_disconnect_rdma_connection(sc); return; } if (data_length) { - if (t->full_packet_received) + if (sc->recv_io.reassembly.full_packet_received) recvmsg->first_segment = true; if (le32_to_cpu(data_transfer->remaining_data_length)) - t->full_packet_received = false; + sc->recv_io.reassembly.full_packet_received = false; else - t->full_packet_received = true; - - spin_lock(&t->receive_credit_lock); - receive_credits = --(t->recv_credits); - avail_recvmsg_count = t->count_avail_recvmsg; - spin_unlock(&t->receive_credit_lock); - } else { - spin_lock(&t->receive_credit_lock); - receive_credits = --(t->recv_credits); - avail_recvmsg_count = ++(t->count_avail_recvmsg); - spin_unlock(&t->receive_credit_lock); + sc->recv_io.reassembly.full_packet_received = true; } - t->recv_credit_target = + atomic_dec(&sc->recv_io.posted.count); + atomic_dec(&sc->recv_io.credits.count); + + old_recv_credit_target = sc->recv_io.credits.target; + sc->recv_io.credits.target = le16_to_cpu(data_transfer->credits_requested); + sc->recv_io.credits.target = + min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); + sc->recv_io.credits.target = + max_t(u16, sc->recv_io.credits.target, 1); atomic_add(le16_to_cpu(data_transfer->credits_granted), - &t->send_credits); + &sc->send_io.credits.count); if (le16_to_cpu(data_transfer->flags) & - SMB_DIRECT_RESPONSE_REQUESTED) - queue_work(smb_direct_wq, &t->send_immediate_work); + SMBDIRECT_FLAG_RESPONSE_REQUESTED) + queue_work(sc->workqueue, &sc->idle.immediate_work); - if (atomic_read(&t->send_credits) > 0) - wake_up_interruptible(&t->wait_send_credits); - - if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) - queue_work(smb_direct_wq, &t->post_recv_credits_work); + if (atomic_read(&sc->send_io.credits.count) > 0) + wake_up(&sc->send_io.credits.wait_queue); if (data_length) { - enqueue_reassembly(t, recvmsg, (int)data_length); - wake_up_interruptible(&t->wait_reassembly_queue); + if (sc->recv_io.credits.target > old_recv_credit_target) + queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); + + enqueue_reassembly(sc, recvmsg, (int)data_length); + wake_up(&sc->recv_io.reassembly.wait_queue); } else - put_recvmsg(t, recvmsg); + put_recvmsg(sc, recvmsg); return; } + case SMBDIRECT_EXPECT_NEGOTIATE_REP: + /* client only */ + break; } /* * This is an internal error! */ - WARN_ON_ONCE(recvmsg->type != SMB_DIRECT_MSG_DATA_TRANSFER); - put_recvmsg(t, recvmsg); - smb_direct_disconnect_rdma_connection(t); + WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); + put_recvmsg(sc, recvmsg); + smb_direct_disconnect_rdma_connection(sc); } -static int smb_direct_post_recv(struct smb_direct_transport *t, - struct smb_direct_recvmsg *recvmsg) +static int smb_direct_post_recv(struct smbdirect_socket *sc, + struct smbdirect_recv_io *recvmsg) { + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_recv_wr wr; int ret; - recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device, - recvmsg->packet, t->max_recv_size, + recvmsg->sge.addr = ib_dma_map_single(sc->ib.dev, + recvmsg->packet, + sp->max_recv_size, DMA_FROM_DEVICE); - ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr); + ret = ib_dma_mapping_error(sc->ib.dev, recvmsg->sge.addr); if (ret) return ret; - recvmsg->sge.length = t->max_recv_size; - recvmsg->sge.lkey = t->pd->local_dma_lkey; + recvmsg->sge.length = sp->max_recv_size; + recvmsg->sge.lkey = sc->ib.pd->local_dma_lkey; recvmsg->cqe.done = recv_done; wr.wr_cqe = &recvmsg->cqe; @@ -656,14 +737,14 @@ static int smb_direct_post_recv(struct smb_direct_transport *t, wr.sg_list = &recvmsg->sge; wr.num_sge = 1; - ret = ib_post_recv(t->qp, &wr, NULL); + ret = ib_post_recv(sc->ib.qp, &wr, NULL); if (ret) { pr_err("Can't post recv: %d\n", ret); - ib_dma_unmap_single(t->cm_id->device, + ib_dma_unmap_single(sc->ib.dev, recvmsg->sge.addr, recvmsg->sge.length, DMA_FROM_DEVICE); recvmsg->sge.length = 0; - smb_direct_disconnect_rdma_connection(t); + smb_direct_disconnect_rdma_connection(sc); return ret; } return ret; @@ -672,15 +753,16 @@ static int smb_direct_post_recv(struct smb_direct_transport *t, static int smb_direct_read(struct ksmbd_transport *t, char *buf, unsigned int size, int unused) { - struct smb_direct_recvmsg *recvmsg; - struct smb_direct_data_transfer *data_transfer; + struct smbdirect_recv_io *recvmsg; + struct smbdirect_data_transfer *data_transfer; int to_copy, to_read, data_read, offset; u32 data_length, remaining_data_length, data_offset; int rc; - struct smb_direct_transport *st = smb_trans_direct_transfort(t); + struct smb_direct_transport *st = SMBD_TRANS(t); + struct smbdirect_socket *sc = &st->socket; again: - if (st->status != SMB_DIRECT_CS_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { pr_err("disconnected\n"); return -ENOTCONN; } @@ -690,9 +772,10 @@ again: * the only one reading from the front of the queue. The transport * may add more entries to the back of the queue at the same time */ - if (st->reassembly_data_length >= size) { + if (sc->recv_io.reassembly.data_length >= size) { int queue_length; int queue_removed = 0; + unsigned long flags; /* * Need to make sure reassembly_data_length is read before @@ -702,13 +785,13 @@ again: * updated in SOFTIRQ as more data is received */ virt_rmb(); - queue_length = st->reassembly_queue_length; + queue_length = sc->recv_io.reassembly.queue_length; data_read = 0; to_read = size; - offset = st->first_entry_offset; + offset = sc->recv_io.reassembly.first_entry_offset; while (data_read < size) { - recvmsg = get_first_reassembly(st); - data_transfer = smb_direct_recvmsg_payload(recvmsg); + recvmsg = get_first_reassembly(sc); + data_transfer = smbdirect_recv_io_payload(recvmsg); data_length = le32_to_cpu(data_transfer->data_length); remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); @@ -748,12 +831,12 @@ again: if (queue_length) { list_del(&recvmsg->list); } else { - spin_lock_irq(&st->reassembly_queue_lock); + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); list_del(&recvmsg->list); - spin_unlock_irq(&st->reassembly_queue_lock); + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); } queue_removed++; - put_recvmsg(st, recvmsg); + put_recvmsg(sc, recvmsg); offset = 0; } else { offset += to_copy; @@ -763,33 +846,24 @@ again: data_read += to_copy; } - spin_lock_irq(&st->reassembly_queue_lock); - st->reassembly_data_length -= data_read; - st->reassembly_queue_length -= queue_removed; - spin_unlock_irq(&st->reassembly_queue_lock); - - spin_lock(&st->receive_credit_lock); - st->count_avail_recvmsg += queue_removed; - if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) { - spin_unlock(&st->receive_credit_lock); - queue_work(smb_direct_wq, &st->post_recv_credits_work); - } else { - spin_unlock(&st->receive_credit_lock); - } + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); + sc->recv_io.reassembly.data_length -= data_read; + sc->recv_io.reassembly.queue_length -= queue_removed; + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - st->first_entry_offset = offset; + sc->recv_io.reassembly.first_entry_offset = offset; ksmbd_debug(RDMA, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", - data_read, st->reassembly_data_length, - st->first_entry_offset); + data_read, sc->recv_io.reassembly.data_length, + sc->recv_io.reassembly.first_entry_offset); read_rfc1002_done: return data_read; } ksmbd_debug(RDMA, "wait_event on more data\n"); - rc = wait_event_interruptible(st->wait_reassembly_queue, - st->reassembly_data_length >= size || - st->status != SMB_DIRECT_CS_CONNECTED); + rc = wait_event_interruptible(sc->recv_io.reassembly.wait_queue, + sc->recv_io.reassembly.data_length >= size || + sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) return -EINTR; @@ -798,56 +872,44 @@ read_rfc1002_done: static void smb_direct_post_recv_credits(struct work_struct *work) { - struct smb_direct_transport *t = container_of(work, - struct smb_direct_transport, post_recv_credits_work); - struct smb_direct_recvmsg *recvmsg; - int receive_credits, credits = 0; + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); + struct smbdirect_recv_io *recvmsg; + int credits = 0; int ret; - spin_lock(&t->receive_credit_lock); - receive_credits = t->recv_credits; - spin_unlock(&t->receive_credit_lock); - - if (receive_credits < t->recv_credit_target) { + if (atomic_read(&sc->recv_io.credits.count) < sc->recv_io.credits.target) { while (true) { - recvmsg = get_free_recvmsg(t); + recvmsg = get_free_recvmsg(sc); if (!recvmsg) break; - recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER; recvmsg->first_segment = false; - ret = smb_direct_post_recv(t, recvmsg); + ret = smb_direct_post_recv(sc, recvmsg); if (ret) { pr_err("Can't post recv: %d\n", ret); - put_recvmsg(t, recvmsg); + put_recvmsg(sc, recvmsg); break; } credits++; + + atomic_inc(&sc->recv_io.posted.count); } } - spin_lock(&t->receive_credit_lock); - t->recv_credits += credits; - t->count_avail_recvmsg -= credits; - spin_unlock(&t->receive_credit_lock); - - spin_lock(&t->lock_new_recv_credits); - t->new_recv_credits += credits; - spin_unlock(&t->lock_new_recv_credits); - if (credits) - queue_work(smb_direct_wq, &t->send_immediate_work); + queue_work(sc->workqueue, &sc->idle.immediate_work); } static void send_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smb_direct_sendmsg *sendmsg, *sibling; - struct smb_direct_transport *t; + struct smbdirect_send_io *sendmsg, *sibling; + struct smbdirect_socket *sc; struct list_head *pos, *prev, *end; - sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe); - t = sendmsg->transport; + sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); + sc = sendmsg->socket; ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, @@ -857,55 +919,78 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) pr_err("Send error. status='%s (%d)', opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); - smb_direct_disconnect_rdma_connection(t); + smb_direct_disconnect_rdma_connection(sc); } - if (atomic_dec_and_test(&t->send_pending)) - wake_up(&t->wait_send_pending); + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); /* iterate and free the list of messages in reverse. the list's head * is invalid. */ - for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next; + for (pos = &sendmsg->sibling_list, prev = pos->prev, end = sendmsg->sibling_list.next; prev != end; pos = prev, prev = prev->prev) { - sibling = container_of(pos, struct smb_direct_sendmsg, list); - smb_direct_free_sendmsg(t, sibling); + sibling = container_of(pos, struct smbdirect_send_io, sibling_list); + smb_direct_free_sendmsg(sc, sibling); } - sibling = container_of(pos, struct smb_direct_sendmsg, list); - smb_direct_free_sendmsg(t, sibling); + sibling = container_of(pos, struct smbdirect_send_io, sibling_list); + smb_direct_free_sendmsg(sc, sibling); } -static int manage_credits_prior_sending(struct smb_direct_transport *t) +static int manage_credits_prior_sending(struct smbdirect_socket *sc) { int new_credits; - spin_lock(&t->lock_new_recv_credits); - new_credits = t->new_recv_credits; - t->new_recv_credits = 0; - spin_unlock(&t->lock_new_recv_credits); + if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) + return 0; + + new_credits = atomic_read(&sc->recv_io.posted.count); + if (new_credits == 0) + return 0; + + new_credits -= atomic_read(&sc->recv_io.credits.count); + if (new_credits <= 0) + return 0; + atomic_add(new_credits, &sc->recv_io.credits.count); return new_credits; } -static int smb_direct_post_send(struct smb_direct_transport *t, +static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) +{ + struct smbdirect_socket_parameters *sp = &sc->parameters; + + if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; + /* + * Now use the keepalive timeout (instead of keepalive interval) + * in order to wait for a response + */ + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_timeout_msec)); + return 1; + } + return 0; +} + +static int smb_direct_post_send(struct smbdirect_socket *sc, struct ib_send_wr *wr) { int ret; - atomic_inc(&t->send_pending); - ret = ib_post_send(t->qp, wr, NULL); + atomic_inc(&sc->send_io.pending.count); + ret = ib_post_send(sc->ib.qp, wr, NULL); if (ret) { pr_err("failed to post send: %d\n", ret); - if (atomic_dec_and_test(&t->send_pending)) - wake_up(&t->wait_send_pending); - smb_direct_disconnect_rdma_connection(t); + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); + smb_direct_disconnect_rdma_connection(sc); } return ret; } -static void smb_direct_send_ctx_init(struct smb_direct_transport *t, - struct smb_direct_send_ctx *send_ctx, +static void smb_direct_send_ctx_init(struct smbdirect_send_batch *send_ctx, bool need_invalidate_rkey, unsigned int remote_key) { @@ -915,47 +1000,50 @@ static void smb_direct_send_ctx_init(struct smb_direct_transport *t, send_ctx->remote_key = remote_key; } -static int smb_direct_flush_send_list(struct smb_direct_transport *t, - struct smb_direct_send_ctx *send_ctx, +static int smb_direct_flush_send_list(struct smbdirect_socket *sc, + struct smbdirect_send_batch *send_ctx, bool is_last) { - struct smb_direct_sendmsg *first, *last; + struct smbdirect_send_io *first, *last; int ret; if (list_empty(&send_ctx->msg_list)) return 0; first = list_first_entry(&send_ctx->msg_list, - struct smb_direct_sendmsg, - list); + struct smbdirect_send_io, + sibling_list); last = list_last_entry(&send_ctx->msg_list, - struct smb_direct_sendmsg, - list); + struct smbdirect_send_io, + sibling_list); + + if (send_ctx->need_invalidate_rkey) { + first->wr.opcode = IB_WR_SEND_WITH_INV; + first->wr.ex.invalidate_rkey = send_ctx->remote_key; + send_ctx->need_invalidate_rkey = false; + send_ctx->remote_key = 0; + } last->wr.send_flags = IB_SEND_SIGNALED; last->wr.wr_cqe = &last->cqe; - if (is_last && send_ctx->need_invalidate_rkey) { - last->wr.opcode = IB_WR_SEND_WITH_INV; - last->wr.ex.invalidate_rkey = send_ctx->remote_key; - } - ret = smb_direct_post_send(t, &first->wr); + ret = smb_direct_post_send(sc, &first->wr); if (!ret) { - smb_direct_send_ctx_init(t, send_ctx, + smb_direct_send_ctx_init(send_ctx, send_ctx->need_invalidate_rkey, send_ctx->remote_key); } else { - atomic_add(send_ctx->wr_cnt, &t->send_credits); - wake_up(&t->wait_send_credits); + atomic_add(send_ctx->wr_cnt, &sc->send_io.credits.count); + wake_up(&sc->send_io.credits.wait_queue); list_for_each_entry_safe(first, last, &send_ctx->msg_list, - list) { - smb_direct_free_sendmsg(t, first); + sibling_list) { + smb_direct_free_sendmsg(sc, first); } } return ret; } -static int wait_for_credits(struct smb_direct_transport *t, +static int wait_for_credits(struct smbdirect_socket *sc, wait_queue_head_t *waitq, atomic_t *total_credits, int needed) { @@ -968,61 +1056,68 @@ static int wait_for_credits(struct smb_direct_transport *t, atomic_add(needed, total_credits); ret = wait_event_interruptible(*waitq, atomic_read(total_credits) >= needed || - t->status != SMB_DIRECT_CS_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (t->status != SMB_DIRECT_CS_CONNECTED) + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return -ENOTCONN; else if (ret < 0) return ret; } while (true); } -static int wait_for_send_credits(struct smb_direct_transport *t, - struct smb_direct_send_ctx *send_ctx) +static int wait_for_send_credits(struct smbdirect_socket *sc, + struct smbdirect_send_batch *send_ctx) { int ret; if (send_ctx && - (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) { - ret = smb_direct_flush_send_list(t, send_ctx, false); + (send_ctx->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { + ret = smb_direct_flush_send_list(sc, send_ctx, false); if (ret) return ret; } - return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1); + return wait_for_credits(sc, &sc->send_io.credits.wait_queue, &sc->send_io.credits.count, 1); } -static int wait_for_rw_credits(struct smb_direct_transport *t, int credits) +static int wait_for_rw_credits(struct smbdirect_socket *sc, int credits) { - return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits); + return wait_for_credits(sc, + &sc->rw_io.credits.wait_queue, + &sc->rw_io.credits.count, + credits); } -static int calc_rw_credits(struct smb_direct_transport *t, +static int calc_rw_credits(struct smbdirect_socket *sc, char *buf, unsigned int len) { return DIV_ROUND_UP(get_buf_page_count(buf, len), - t->pages_per_rw_credit); + sc->rw_io.credits.num_pages); } -static int smb_direct_create_header(struct smb_direct_transport *t, +static int smb_direct_create_header(struct smbdirect_socket *sc, int size, int remaining_data_length, - struct smb_direct_sendmsg **sendmsg_out) + struct smbdirect_send_io **sendmsg_out) { - struct smb_direct_sendmsg *sendmsg; - struct smb_direct_data_transfer *packet; + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_send_io *sendmsg; + struct smbdirect_data_transfer *packet; int header_length; int ret; - sendmsg = smb_direct_alloc_sendmsg(t); + sendmsg = smb_direct_alloc_sendmsg(sc); if (IS_ERR(sendmsg)) return PTR_ERR(sendmsg); /* Fill in the packet header */ - packet = (struct smb_direct_data_transfer *)sendmsg->packet; - packet->credits_requested = cpu_to_le16(t->send_credit_target); - packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); + packet = (struct smbdirect_data_transfer *)sendmsg->packet; + packet->credits_requested = cpu_to_le16(sp->send_credit_target); + packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc)); packet->flags = 0; + if (manage_keep_alive_before_sending(sc)) + packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); + packet->reserved = 0; if (!size) packet->data_offset = 0; @@ -1041,25 +1136,25 @@ static int smb_direct_create_header(struct smb_direct_transport *t, le32_to_cpu(packet->remaining_data_length)); /* Map the packet to DMA */ - header_length = sizeof(struct smb_direct_data_transfer); + header_length = sizeof(struct smbdirect_data_transfer); /* If this is a packet without payload, don't send padding */ if (!size) header_length = - offsetof(struct smb_direct_data_transfer, padding); + offsetof(struct smbdirect_data_transfer, padding); - sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, + sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, (void *)packet, header_length, DMA_TO_DEVICE); - ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); + ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); if (ret) { - smb_direct_free_sendmsg(t, sendmsg); + smb_direct_free_sendmsg(sc, sendmsg); return ret; } sendmsg->num_sge = 1; sendmsg->sge[0].length = header_length; - sendmsg->sge[0].lkey = t->pd->local_dma_lkey; + sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; *sendmsg_out = sendmsg; return 0; @@ -1109,14 +1204,14 @@ static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, return ib_dma_map_sg(device, sg_list, npages, dir); } -static int post_sendmsg(struct smb_direct_transport *t, - struct smb_direct_send_ctx *send_ctx, - struct smb_direct_sendmsg *msg) +static int post_sendmsg(struct smbdirect_socket *sc, + struct smbdirect_send_batch *send_ctx, + struct smbdirect_send_io *msg) { int i; for (i = 0; i < msg->num_sge; i++) - ib_dma_sync_single_for_device(t->cm_id->device, + ib_dma_sync_single_for_device(sc->ib.dev, msg->sge[i].addr, msg->sge[i].length, DMA_TO_DEVICE); @@ -1130,34 +1225,34 @@ static int post_sendmsg(struct smb_direct_transport *t, msg->wr.wr_cqe = NULL; msg->wr.send_flags = 0; if (!list_empty(&send_ctx->msg_list)) { - struct smb_direct_sendmsg *last; + struct smbdirect_send_io *last; last = list_last_entry(&send_ctx->msg_list, - struct smb_direct_sendmsg, - list); + struct smbdirect_send_io, + sibling_list); last->wr.next = &msg->wr; } - list_add_tail(&msg->list, &send_ctx->msg_list); + list_add_tail(&msg->sibling_list, &send_ctx->msg_list); send_ctx->wr_cnt++; return 0; } msg->wr.wr_cqe = &msg->cqe; msg->wr.send_flags = IB_SEND_SIGNALED; - return smb_direct_post_send(t, &msg->wr); + return smb_direct_post_send(sc, &msg->wr); } -static int smb_direct_post_send_data(struct smb_direct_transport *t, - struct smb_direct_send_ctx *send_ctx, +static int smb_direct_post_send_data(struct smbdirect_socket *sc, + struct smbdirect_send_batch *send_ctx, struct kvec *iov, int niov, int remaining_data_length) { int i, j, ret; - struct smb_direct_sendmsg *msg; + struct smbdirect_send_io *msg; int data_length; - struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1]; + struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1]; - ret = wait_for_send_credits(t, send_ctx); + ret = wait_for_send_credits(sc, send_ctx); if (ret) return ret; @@ -1165,10 +1260,10 @@ static int smb_direct_post_send_data(struct smb_direct_transport *t, for (i = 0; i < niov; i++) data_length += iov[i].iov_len; - ret = smb_direct_create_header(t, data_length, remaining_data_length, + ret = smb_direct_create_header(sc, data_length, remaining_data_length, &msg); if (ret) { - atomic_inc(&t->send_credits); + atomic_inc(&sc->send_io.credits.count); return ret; } @@ -1176,19 +1271,19 @@ static int smb_direct_post_send_data(struct smb_direct_transport *t, struct ib_sge *sge; int sg_cnt; - sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1); - sg_cnt = get_mapped_sg_list(t->cm_id->device, + sg_init_table(sg, SMBDIRECT_SEND_IO_MAX_SGE - 1); + sg_cnt = get_mapped_sg_list(sc->ib.dev, iov[i].iov_base, iov[i].iov_len, - sg, SMB_DIRECT_MAX_SEND_SGES - 1, + sg, SMBDIRECT_SEND_IO_MAX_SGE - 1, DMA_TO_DEVICE); if (sg_cnt <= 0) { pr_err("failed to map buffer\n"); ret = -ENOMEM; goto err; - } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) { + } else if (sg_cnt + msg->num_sge > SMBDIRECT_SEND_IO_MAX_SGE) { pr_err("buffer not fitted into sges\n"); ret = -E2BIG; - ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt, + ib_dma_unmap_sg(sc->ib.dev, sg, sg_cnt, DMA_TO_DEVICE); goto err; } @@ -1197,18 +1292,18 @@ static int smb_direct_post_send_data(struct smb_direct_transport *t, sge = &msg->sge[msg->num_sge]; sge->addr = sg_dma_address(&sg[j]); sge->length = sg_dma_len(&sg[j]); - sge->lkey = t->pd->local_dma_lkey; + sge->lkey = sc->ib.pd->local_dma_lkey; msg->num_sge++; } } - ret = post_sendmsg(t, send_ctx, msg); + ret = post_sendmsg(sc, send_ctx, msg); if (ret) goto err; return 0; err: - smb_direct_free_sendmsg(t, msg); - atomic_inc(&t->send_credits); + smb_direct_free_sendmsg(sc, msg); + atomic_inc(&sc->send_io.credits.count); return ret; } @@ -1216,17 +1311,19 @@ static int smb_direct_writev(struct ksmbd_transport *t, struct kvec *iov, int niovs, int buflen, bool need_invalidate, unsigned int remote_key) { - struct smb_direct_transport *st = smb_trans_direct_transfort(t); + struct smb_direct_transport *st = SMBD_TRANS(t); + struct smbdirect_socket *sc = &st->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; size_t remaining_data_length; size_t iov_idx; size_t iov_ofs; - size_t max_iov_size = st->max_send_size - - sizeof(struct smb_direct_data_transfer); + size_t max_iov_size = sp->max_send_size - + sizeof(struct smbdirect_data_transfer); int ret; - struct smb_direct_send_ctx send_ctx; + struct smbdirect_send_batch send_ctx; int error = 0; - if (st->status != SMB_DIRECT_CS_CONNECTED) + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return -ENOTCONN; //FIXME: skip RFC1002 header.. @@ -1239,9 +1336,9 @@ static int smb_direct_writev(struct ksmbd_transport *t, remaining_data_length = buflen; ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); - smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key); + smb_direct_send_ctx_init(&send_ctx, need_invalidate, remote_key); while (remaining_data_length) { - struct kvec vecs[SMB_DIRECT_MAX_SEND_SGES - 1]; /* minus smbdirect hdr */ + struct kvec vecs[SMBDIRECT_SEND_IO_MAX_SGE - 1]; /* minus smbdirect hdr */ size_t possible_bytes = max_iov_size; size_t possible_vecs; size_t bytes = 0; @@ -1328,7 +1425,7 @@ static int smb_direct_writev(struct ksmbd_transport *t, remaining_data_length -= bytes; - ret = smb_direct_post_send_data(st, &send_ctx, + ret = smb_direct_post_send_data(sc, &send_ctx, vecs, nvecs, remaining_data_length); if (unlikely(ret)) { @@ -1338,7 +1435,7 @@ static int smb_direct_writev(struct ksmbd_transport *t, } done: - ret = smb_direct_flush_send_list(st, &send_ctx, true); + ret = smb_direct_flush_send_list(sc, &send_ctx, true); if (unlikely(!ret && error)) ret = error; @@ -1349,16 +1446,22 @@ done: * that means all the I/Os have been out and we are good to return */ - wait_event(st->wait_send_pending, - atomic_read(&st->send_pending) == 0); + wait_event(sc->send_io.pending.zero_wait_queue, + atomic_read(&sc->send_io.pending.count) == 0 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + if (sc->status != SMBDIRECT_SOCKET_CONNECTED && ret == 0) + ret = -ENOTCONN; + return ret; } static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, - struct smb_direct_rdma_rw_msg *msg, + struct smbdirect_rw_io *msg, enum dma_data_direction dir) { - rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, + struct smbdirect_socket *sc = &t->socket; + + rdma_rw_ctx_destroy(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, msg->sgt.sgl, msg->sgt.nents, dir); sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); kfree(msg); @@ -1367,16 +1470,16 @@ static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, enum dma_data_direction dir) { - struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe, - struct smb_direct_rdma_rw_msg, cqe); - struct smb_direct_transport *t = msg->t; + struct smbdirect_rw_io *msg = + container_of(wc->wr_cqe, struct smbdirect_rw_io, cqe); + struct smbdirect_socket *sc = msg->socket; if (wc->status != IB_WC_SUCCESS) { - msg->status = -EIO; + msg->error = -EIO; pr_err("read/write error. opcode = %d, status = %s(%d)\n", wc->opcode, ib_wc_status_msg(wc->status), wc->status); if (wc->status != IB_WC_WR_FLUSH_ERR) - smb_direct_disconnect_rdma_connection(t); + smb_direct_disconnect_rdma_connection(sc); } complete(msg->completion); @@ -1394,11 +1497,13 @@ static void write_done(struct ib_cq *cq, struct ib_wc *wc) static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf, int buf_len, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len, bool is_read) { - struct smb_direct_rdma_rw_msg *msg, *next_msg; + struct smbdirect_socket *sc = &t->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_rw_io *msg, *next_msg; int i, ret; DECLARE_COMPLETION_ONSTACK(completion); struct ib_send_wr *first_wr; @@ -1407,10 +1512,10 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, int credits_needed; unsigned int desc_buf_len, desc_num = 0; - if (t->status != SMB_DIRECT_CS_CONNECTED) + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return -ENOTCONN; - if (buf_len > t->max_rdma_rw_size) + if (buf_len > sp->max_read_write_size) return -EINVAL; /* calculate needed credits */ @@ -1430,7 +1535,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, buf_len = 0; } - credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len); + credits_needed += calc_rw_credits(sc, desc_buf, desc_buf_len); desc_buf += desc_buf_len; buf_len -= desc_buf_len; desc_num++; @@ -1439,7 +1544,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", str_read_write(is_read), buf_len, credits_needed); - ret = wait_for_rw_credits(t, credits_needed); + ret = wait_for_rw_credits(sc, credits_needed); if (ret < 0) return ret; @@ -1455,7 +1560,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, desc_buf_len = le32_to_cpu(desc[i].length); - msg->t = t; + msg->socket = sc; msg->cqe.done = is_read ? read_done : write_done; msg->completion = &completion; @@ -1477,7 +1582,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, goto out; } - ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, + ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, msg->sgt.sgl, get_buf_page_count(desc_buf, desc_buf_len), 0, @@ -1498,96 +1603,94 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, /* concatenate work requests of rdma_rw_ctxs */ first_wr = NULL; list_for_each_entry_reverse(msg, &msg_list, list) { - first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, + first_wr = rdma_rw_ctx_wrs(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, &msg->cqe, first_wr); } - ret = ib_post_send(t->qp, first_wr, NULL); + ret = ib_post_send(sc->ib.qp, first_wr, NULL); if (ret) { pr_err("failed to post send wr for RDMA R/W: %d\n", ret); goto out; } - msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list); + msg = list_last_entry(&msg_list, struct smbdirect_rw_io, list); wait_for_completion(&completion); - ret = msg->status; + ret = msg->error; out: list_for_each_entry_safe(msg, next_msg, &msg_list, list) { list_del(&msg->list); smb_direct_free_rdma_rw_msg(t, msg, is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } - atomic_add(credits_needed, &t->rw_credits); - wake_up(&t->wait_rw_credits); + atomic_add(credits_needed, &sc->rw_io.credits.count); + wake_up(&sc->rw_io.credits.wait_queue); return ret; } static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf, unsigned int buflen, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len) { - return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, + return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, desc, desc_len, false); } static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf, unsigned int buflen, - struct smb2_buffer_desc_v1 *desc, + struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len) { - return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, + return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, desc, desc_len, true); } static void smb_direct_disconnect(struct ksmbd_transport *t) { - struct smb_direct_transport *st = smb_trans_direct_transfort(t); + struct smb_direct_transport *st = SMBD_TRANS(t); + struct smbdirect_socket *sc = &st->socket; - ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id); + ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", sc->rdma.cm_id); - smb_direct_disconnect_rdma_work(&st->disconnect_work); - wait_event_interruptible(st->wait_status, - st->status == SMB_DIRECT_CS_DISCONNECTED); free_transport(st); } static void smb_direct_shutdown(struct ksmbd_transport *t) { - struct smb_direct_transport *st = smb_trans_direct_transfort(t); + struct smb_direct_transport *st = SMBD_TRANS(t); + struct smbdirect_socket *sc = &st->socket; - ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id); + ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", sc->rdma.cm_id); - smb_direct_disconnect_rdma_work(&st->disconnect_work); + smb_direct_disconnect_rdma_work(&sc->disconnect_work); } static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { - struct smb_direct_transport *t = cm_id->context; + struct smbdirect_socket *sc = cm_id->context; ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", cm_id, rdma_event_msg(event->event), event->event); switch (event->event) { case RDMA_CM_EVENT_ESTABLISHED: { - t->status = SMB_DIRECT_CS_CONNECTED; - wake_up_interruptible(&t->wait_status); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING); + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; + wake_up(&sc->status_wait); break; } case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: { - ib_drain_qp(t->qp); + ib_drain_qp(sc->ib.qp); - t->status = SMB_DIRECT_CS_DISCONNECTED; - wake_up_interruptible(&t->wait_status); - wake_up_interruptible(&t->wait_reassembly_queue); - wake_up(&t->wait_send_credits); + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + smb_direct_disconnect_rdma_work(&sc->disconnect_work); break; } case RDMA_CM_EVENT_CONNECT_ERROR: { - t->status = SMB_DIRECT_CS_DISCONNECTED; - wake_up_interruptible(&t->wait_status); + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + smb_direct_disconnect_rdma_work(&sc->disconnect_work); break; } default: @@ -1601,38 +1704,41 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, static void smb_direct_qpair_handler(struct ib_event *event, void *context) { - struct smb_direct_transport *t = context; + struct smbdirect_socket *sc = context; ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", - t->cm_id, ib_event_msg(event->event), event->event); + sc->rdma.cm_id, ib_event_msg(event->event), event->event); switch (event->event) { case IB_EVENT_CQ_ERR: case IB_EVENT_QP_FATAL: - smb_direct_disconnect_rdma_connection(t); + smb_direct_disconnect_rdma_connection(sc); break; default: break; } } -static int smb_direct_send_negotiate_response(struct smb_direct_transport *t, +static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, int failed) { - struct smb_direct_sendmsg *sendmsg; - struct smb_direct_negotiate_resp *resp; + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_send_io *sendmsg; + struct smbdirect_negotiate_resp *resp; int ret; - sendmsg = smb_direct_alloc_sendmsg(t); + sendmsg = smb_direct_alloc_sendmsg(sc); if (IS_ERR(sendmsg)) return -ENOMEM; - resp = (struct smb_direct_negotiate_resp *)sendmsg->packet; + resp = (struct smbdirect_negotiate_resp *)sendmsg->packet; if (failed) { memset(resp, 0, sizeof(*resp)); - resp->min_version = cpu_to_le16(0x0100); - resp->max_version = cpu_to_le16(0x0100); + resp->min_version = SMB_DIRECT_VERSION_LE; + resp->max_version = SMB_DIRECT_VERSION_LE; resp->status = STATUS_NOT_SUPPORTED; + + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; } else { resp->status = STATUS_SUCCESS; resp->min_version = SMB_DIRECT_VERSION_LE; @@ -1640,57 +1746,65 @@ static int smb_direct_send_negotiate_response(struct smb_direct_transport *t, resp->negotiated_version = SMB_DIRECT_VERSION_LE; resp->reserved = 0; resp->credits_requested = - cpu_to_le16(t->send_credit_target); - resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); - resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size); - resp->preferred_send_size = cpu_to_le32(t->max_send_size); - resp->max_receive_size = cpu_to_le32(t->max_recv_size); + cpu_to_le16(sp->send_credit_target); + resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc)); + resp->max_readwrite_size = cpu_to_le32(sp->max_read_write_size); + resp->preferred_send_size = cpu_to_le32(sp->max_send_size); + resp->max_receive_size = cpu_to_le32(sp->max_recv_size); resp->max_fragmented_size = - cpu_to_le32(t->max_fragmented_recv_size); + cpu_to_le32(sp->max_fragmented_recv_size); + + sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; + sc->status = SMBDIRECT_SOCKET_CONNECTED; } - sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, + sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, (void *)resp, sizeof(*resp), DMA_TO_DEVICE); - ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); + ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); if (ret) { - smb_direct_free_sendmsg(t, sendmsg); + smb_direct_free_sendmsg(sc, sendmsg); return ret; } sendmsg->num_sge = 1; sendmsg->sge[0].length = sizeof(*resp); - sendmsg->sge[0].lkey = t->pd->local_dma_lkey; + sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; - ret = post_sendmsg(t, NULL, sendmsg); + ret = post_sendmsg(sc, NULL, sendmsg); if (ret) { - smb_direct_free_sendmsg(t, sendmsg); + smb_direct_free_sendmsg(sc, sendmsg); return ret; } - wait_event(t->wait_send_pending, - atomic_read(&t->send_pending) == 0); + wait_event(sc->send_io.pending.zero_wait_queue, + atomic_read(&sc->send_io.pending.count) == 0 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + return -ENOTCONN; + return 0; } -static int smb_direct_accept_client(struct smb_direct_transport *t) +static int smb_direct_accept_client(struct smbdirect_socket *sc) { + struct smbdirect_socket_parameters *sp = &sc->parameters; struct rdma_conn_param conn_param; - struct ib_port_immutable port_immutable; - u32 ird_ord_hdr[2]; + __be32 ird_ord_hdr[2]; int ret; + /* + * smb_direct_handle_connect_request() + * already negotiated sp->initiator_depth + * and sp->responder_resources + */ memset(&conn_param, 0, sizeof(conn_param)); - conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom, - SMB_DIRECT_CM_INITIATOR_DEPTH); - conn_param.responder_resources = 0; - - t->cm_id->device->ops.get_port_immutable(t->cm_id->device, - t->cm_id->port_num, - &port_immutable); - if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { - ird_ord_hdr[0] = conn_param.responder_resources; - ird_ord_hdr[1] = 1; + conn_param.initiator_depth = sp->initiator_depth; + conn_param.responder_resources = sp->responder_resources; + + if (sc->rdma.legacy_iwarp) { + ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); + ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); conn_param.private_data = ird_ord_hdr; conn_param.private_data_len = sizeof(ird_ord_hdr); } else { @@ -1701,7 +1815,17 @@ static int smb_direct_accept_client(struct smb_direct_transport *t) conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; conn_param.flow_control = 0; - ret = rdma_accept(t->cm_id, &conn_param); + /* + * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING + * so that the timer will cause a disconnect. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->negotiate_timeout_msec)); + + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; + ret = rdma_accept(sc->rdma.cm_id, &conn_param); if (ret) { pr_err("error at rdma_accept: %d\n", ret); return ret; @@ -1709,57 +1833,60 @@ static int smb_direct_accept_client(struct smb_direct_transport *t) return 0; } -static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) +static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) { + struct smbdirect_recv_io *recvmsg; int ret; - struct smb_direct_recvmsg *recvmsg; - recvmsg = get_free_recvmsg(t); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; + + sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REQ; + + recvmsg = get_free_recvmsg(sc); if (!recvmsg) return -ENOMEM; - recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ; - ret = smb_direct_post_recv(t, recvmsg); + ret = smb_direct_post_recv(sc, recvmsg); if (ret) { pr_err("Can't post recv: %d\n", ret); goto out_err; } - t->negotiation_requested = false; - ret = smb_direct_accept_client(t); + ret = smb_direct_accept_client(sc); if (ret) { pr_err("Can't accept client\n"); goto out_err; } - smb_direct_post_recv_credits(&t->post_recv_credits_work); + smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); return 0; out_err: - put_recvmsg(t, recvmsg); + put_recvmsg(sc, recvmsg); return ret; } -static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t) +static unsigned int smb_direct_get_max_fr_pages(struct smbdirect_socket *sc) { return min_t(unsigned int, - t->cm_id->device->attrs.max_fast_reg_page_list_len, + sc->ib.dev->attrs.max_fast_reg_page_list_len, 256); } -static int smb_direct_init_params(struct smb_direct_transport *t, +static int smb_direct_init_params(struct smbdirect_socket *sc, struct ib_qp_cap *cap) { - struct ib_device *device = t->cm_id->device; + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct ib_device *device = sc->ib.dev; int max_send_sges, max_rw_wrs, max_send_wrs; unsigned int max_sge_per_wr, wrs_per_credit; /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, * SMB2 response could be mapped. */ - t->max_send_size = smb_direct_max_send_size; - max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3; - if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) { - pr_err("max_send_size %d is too large\n", t->max_send_size); + max_send_sges = DIV_ROUND_UP(sp->max_send_size, PAGE_SIZE) + 3; + if (max_send_sges > SMBDIRECT_SEND_IO_MAX_SGE) { + pr_err("max_send_size %d is too large\n", sp->max_send_size); return -EINVAL; } @@ -1770,10 +1897,9 @@ static int smb_direct_init_params(struct smb_direct_transport *t, * are needed for MR registration, RDMA R/W, local & remote * MR invalidation. */ - t->max_rdma_rw_size = smb_direct_max_read_write_size; - t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t); - t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size, - (t->pages_per_rw_credit - 1) * + sc->rw_io.credits.num_pages = smb_direct_get_max_fr_pages(sc); + sc->rw_io.credits.max = DIV_ROUND_UP(sp->max_read_write_size, + (sc->rw_io.credits.num_pages - 1) * PAGE_SIZE); max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, @@ -1781,238 +1907,244 @@ static int smb_direct_init_params(struct smb_direct_transport *t, max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, max_send_sges); wrs_per_credit = max_t(unsigned int, 4, - DIV_ROUND_UP(t->pages_per_rw_credit, + DIV_ROUND_UP(sc->rw_io.credits.num_pages, max_sge_per_wr) + 1); - max_rw_wrs = t->max_rw_credits * wrs_per_credit; + max_rw_wrs = sc->rw_io.credits.max * wrs_per_credit; - max_send_wrs = smb_direct_send_credit_target + max_rw_wrs; + max_send_wrs = sp->send_credit_target + max_rw_wrs; if (max_send_wrs > device->attrs.max_cqe || max_send_wrs > device->attrs.max_qp_wr) { pr_err("consider lowering send_credit_target = %d\n", - smb_direct_send_credit_target); + sp->send_credit_target); pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", device->attrs.max_cqe, device->attrs.max_qp_wr); return -EINVAL; } - if (smb_direct_receive_credit_max > device->attrs.max_cqe || - smb_direct_receive_credit_max > device->attrs.max_qp_wr) { + if (sp->recv_credit_max > device->attrs.max_cqe || + sp->recv_credit_max > device->attrs.max_qp_wr) { pr_err("consider lowering receive_credit_max = %d\n", - smb_direct_receive_credit_max); + sp->recv_credit_max); pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", device->attrs.max_cqe, device->attrs.max_qp_wr); return -EINVAL; } - if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) { + if (device->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE) { pr_err("warning: device max_send_sge = %d too small\n", device->attrs.max_send_sge); return -EINVAL; } - if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) { + if (device->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { pr_err("warning: device max_recv_sge = %d too small\n", device->attrs.max_recv_sge); return -EINVAL; } - t->recv_credits = 0; - t->count_avail_recvmsg = 0; - - t->recv_credit_max = smb_direct_receive_credit_max; - t->recv_credit_target = 10; - t->new_recv_credits = 0; + sc->recv_io.credits.target = 1; - t->send_credit_target = smb_direct_send_credit_target; - atomic_set(&t->send_credits, 0); - atomic_set(&t->rw_credits, t->max_rw_credits); - - t->max_send_size = smb_direct_max_send_size; - t->max_recv_size = smb_direct_max_receive_size; - t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; + atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); cap->max_send_wr = max_send_wrs; - cap->max_recv_wr = t->recv_credit_max; - cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES; - cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; + cap->max_recv_wr = sp->recv_credit_max; + cap->max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; + cap->max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; cap->max_inline_data = 0; - cap->max_rdma_ctxs = t->max_rw_credits; + cap->max_rdma_ctxs = sc->rw_io.credits.max; return 0; } -static void smb_direct_destroy_pools(struct smb_direct_transport *t) +static void smb_direct_destroy_pools(struct smbdirect_socket *sc) { - struct smb_direct_recvmsg *recvmsg; + struct smbdirect_recv_io *recvmsg; - while ((recvmsg = get_free_recvmsg(t))) - mempool_free(recvmsg, t->recvmsg_mempool); + while ((recvmsg = get_free_recvmsg(sc))) + mempool_free(recvmsg, sc->recv_io.mem.pool); - mempool_destroy(t->recvmsg_mempool); - t->recvmsg_mempool = NULL; + mempool_destroy(sc->recv_io.mem.pool); + sc->recv_io.mem.pool = NULL; - kmem_cache_destroy(t->recvmsg_cache); - t->recvmsg_cache = NULL; + kmem_cache_destroy(sc->recv_io.mem.cache); + sc->recv_io.mem.cache = NULL; - mempool_destroy(t->sendmsg_mempool); - t->sendmsg_mempool = NULL; + mempool_destroy(sc->send_io.mem.pool); + sc->send_io.mem.pool = NULL; - kmem_cache_destroy(t->sendmsg_cache); - t->sendmsg_cache = NULL; + kmem_cache_destroy(sc->send_io.mem.cache); + sc->send_io.mem.cache = NULL; } -static int smb_direct_create_pools(struct smb_direct_transport *t) +static int smb_direct_create_pools(struct smbdirect_socket *sc) { + struct smbdirect_socket_parameters *sp = &sc->parameters; char name[80]; int i; - struct smb_direct_recvmsg *recvmsg; + struct smbdirect_recv_io *recvmsg; - snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t); - t->sendmsg_cache = kmem_cache_create(name, - sizeof(struct smb_direct_sendmsg) + - sizeof(struct smb_direct_negotiate_resp), + snprintf(name, sizeof(name), "smbdirect_send_io_pool_%p", sc); + sc->send_io.mem.cache = kmem_cache_create(name, + sizeof(struct smbdirect_send_io) + + sizeof(struct smbdirect_negotiate_resp), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!t->sendmsg_cache) + if (!sc->send_io.mem.cache) return -ENOMEM; - t->sendmsg_mempool = mempool_create(t->send_credit_target, + sc->send_io.mem.pool = mempool_create(sp->send_credit_target, mempool_alloc_slab, mempool_free_slab, - t->sendmsg_cache); - if (!t->sendmsg_mempool) + sc->send_io.mem.cache); + if (!sc->send_io.mem.pool) goto err; - snprintf(name, sizeof(name), "smb_direct_resp_%p", t); - t->recvmsg_cache = kmem_cache_create(name, - sizeof(struct smb_direct_recvmsg) + - t->max_recv_size, + snprintf(name, sizeof(name), "smbdirect_recv_io_pool_%p", sc); + sc->recv_io.mem.cache = kmem_cache_create(name, + sizeof(struct smbdirect_recv_io) + + sp->max_recv_size, 0, SLAB_HWCACHE_ALIGN, NULL); - if (!t->recvmsg_cache) + if (!sc->recv_io.mem.cache) goto err; - t->recvmsg_mempool = - mempool_create(t->recv_credit_max, mempool_alloc_slab, - mempool_free_slab, t->recvmsg_cache); - if (!t->recvmsg_mempool) + sc->recv_io.mem.pool = + mempool_create(sp->recv_credit_max, mempool_alloc_slab, + mempool_free_slab, sc->recv_io.mem.cache); + if (!sc->recv_io.mem.pool) goto err; - INIT_LIST_HEAD(&t->recvmsg_queue); - - for (i = 0; i < t->recv_credit_max; i++) { - recvmsg = mempool_alloc(t->recvmsg_mempool, KSMBD_DEFAULT_GFP); + for (i = 0; i < sp->recv_credit_max; i++) { + recvmsg = mempool_alloc(sc->recv_io.mem.pool, KSMBD_DEFAULT_GFP); if (!recvmsg) goto err; - recvmsg->transport = t; + recvmsg->socket = sc; recvmsg->sge.length = 0; - list_add(&recvmsg->list, &t->recvmsg_queue); + list_add(&recvmsg->list, &sc->recv_io.free.list); } - t->count_avail_recvmsg = t->recv_credit_max; return 0; err: - smb_direct_destroy_pools(t); + smb_direct_destroy_pools(sc); return -ENOMEM; } -static int smb_direct_create_qpair(struct smb_direct_transport *t, +static int smb_direct_create_qpair(struct smbdirect_socket *sc, struct ib_qp_cap *cap) { + struct smbdirect_socket_parameters *sp = &sc->parameters; int ret; struct ib_qp_init_attr qp_attr; int pages_per_rw; - t->pd = ib_alloc_pd(t->cm_id->device, 0); - if (IS_ERR(t->pd)) { + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); + if (IS_ERR(sc->ib.pd)) { pr_err("Can't create RDMA PD\n"); - ret = PTR_ERR(t->pd); - t->pd = NULL; + ret = PTR_ERR(sc->ib.pd); + sc->ib.pd = NULL; return ret; } - t->send_cq = ib_alloc_cq(t->cm_id->device, t, - smb_direct_send_credit_target + cap->max_rdma_ctxs, - 0, IB_POLL_WORKQUEUE); - if (IS_ERR(t->send_cq)) { + sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, + sp->send_credit_target + + cap->max_rdma_ctxs, + IB_POLL_WORKQUEUE); + if (IS_ERR(sc->ib.send_cq)) { pr_err("Can't create RDMA send CQ\n"); - ret = PTR_ERR(t->send_cq); - t->send_cq = NULL; + ret = PTR_ERR(sc->ib.send_cq); + sc->ib.send_cq = NULL; goto err; } - t->recv_cq = ib_alloc_cq(t->cm_id->device, t, - t->recv_credit_max, 0, IB_POLL_WORKQUEUE); - if (IS_ERR(t->recv_cq)) { + sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, + sp->recv_credit_max, + IB_POLL_WORKQUEUE); + if (IS_ERR(sc->ib.recv_cq)) { pr_err("Can't create RDMA recv CQ\n"); - ret = PTR_ERR(t->recv_cq); - t->recv_cq = NULL; + ret = PTR_ERR(sc->ib.recv_cq); + sc->ib.recv_cq = NULL; goto err; } memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smb_direct_qpair_handler; - qp_attr.qp_context = t; + qp_attr.qp_context = sc; qp_attr.cap = *cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; - qp_attr.send_cq = t->send_cq; - qp_attr.recv_cq = t->recv_cq; + qp_attr.send_cq = sc->ib.send_cq; + qp_attr.recv_cq = sc->ib.recv_cq; qp_attr.port_num = ~0; - ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr); + ret = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); if (ret) { pr_err("Can't create RDMA QP: %d\n", ret); goto err; } - t->qp = t->cm_id->qp; - t->cm_id->event_handler = smb_direct_cm_handler; + sc->ib.qp = sc->rdma.cm_id->qp; + sc->rdma.cm_id->event_handler = smb_direct_cm_handler; - pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; - if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { - ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, - t->max_rw_credits, IB_MR_TYPE_MEM_REG, - t->pages_per_rw_credit, 0); + pages_per_rw = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE) + 1; + if (pages_per_rw > sc->ib.dev->attrs.max_sgl_rd) { + ret = ib_mr_pool_init(sc->ib.qp, &sc->ib.qp->rdma_mrs, + sc->rw_io.credits.max, IB_MR_TYPE_MEM_REG, + sc->rw_io.credits.num_pages, 0); if (ret) { - pr_err("failed to init mr pool count %d pages %d\n", - t->max_rw_credits, t->pages_per_rw_credit); + pr_err("failed to init mr pool count %zu pages %zu\n", + sc->rw_io.credits.max, sc->rw_io.credits.num_pages); goto err; } } return 0; err: - if (t->qp) { - t->qp = NULL; - rdma_destroy_qp(t->cm_id); + if (sc->ib.qp) { + sc->ib.qp = NULL; + rdma_destroy_qp(sc->rdma.cm_id); } - if (t->recv_cq) { - ib_destroy_cq(t->recv_cq); - t->recv_cq = NULL; + if (sc->ib.recv_cq) { + ib_destroy_cq(sc->ib.recv_cq); + sc->ib.recv_cq = NULL; } - if (t->send_cq) { - ib_destroy_cq(t->send_cq); - t->send_cq = NULL; + if (sc->ib.send_cq) { + ib_destroy_cq(sc->ib.send_cq); + sc->ib.send_cq = NULL; } - if (t->pd) { - ib_dealloc_pd(t->pd); - t->pd = NULL; + if (sc->ib.pd) { + ib_dealloc_pd(sc->ib.pd); + sc->ib.pd = NULL; } return ret; } static int smb_direct_prepare(struct ksmbd_transport *t) { - struct smb_direct_transport *st = smb_trans_direct_transfort(t); - struct smb_direct_recvmsg *recvmsg; - struct smb_direct_negotiate_req *req; + struct smb_direct_transport *st = SMBD_TRANS(t); + struct smbdirect_socket *sc = &st->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_recv_io *recvmsg; + struct smbdirect_negotiate_req *req; + unsigned long flags; int ret; + /* + * We are waiting to pass the following states: + * + * SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED + * SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING + * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED + * + * To finally get to SMBDIRECT_SOCKET_NEGOTIATE_RUNNING + * in order to continue below. + * + * Everything else is unexpected and an error. + */ ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); - ret = wait_event_interruptible_timeout(st->wait_status, - st->negotiation_requested || - st->status == SMB_DIRECT_CS_DISCONNECTED, - SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); - if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED) + ret = wait_event_interruptible_timeout(sc->status_wait, + sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED && + sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING && + sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, + msecs_to_jiffies(sp->negotiate_timeout_msec)); + if (ret <= 0 || sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) return ret < 0 ? ret : -ETIMEDOUT; - recvmsg = get_first_reassembly(st); + recvmsg = get_first_reassembly(sc); if (!recvmsg) return -ECONNABORTED; @@ -2020,51 +2152,54 @@ static int smb_direct_prepare(struct ksmbd_transport *t) if (ret == -ECONNABORTED) goto out; - req = (struct smb_direct_negotiate_req *)recvmsg->packet; - st->max_recv_size = min_t(int, st->max_recv_size, + req = (struct smbdirect_negotiate_req *)recvmsg->packet; + sp->max_recv_size = min_t(int, sp->max_recv_size, le32_to_cpu(req->preferred_send_size)); - st->max_send_size = min_t(int, st->max_send_size, + sp->max_send_size = min_t(int, sp->max_send_size, le32_to_cpu(req->max_receive_size)); - st->max_fragmented_send_size = + sp->max_fragmented_send_size = le32_to_cpu(req->max_fragmented_size); - st->max_fragmented_recv_size = - (st->recv_credit_max * st->max_recv_size) / 2; + sp->max_fragmented_recv_size = + (sp->recv_credit_max * sp->max_recv_size) / 2; + sc->recv_io.credits.target = le16_to_cpu(req->credits_requested); + sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); + sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1); - ret = smb_direct_send_negotiate_response(st, ret); + ret = smb_direct_send_negotiate_response(sc, ret); out: - spin_lock_irq(&st->reassembly_queue_lock); - st->reassembly_queue_length--; + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); + sc->recv_io.reassembly.queue_length--; list_del(&recvmsg->list); - spin_unlock_irq(&st->reassembly_queue_lock); - put_recvmsg(st, recvmsg); + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); + put_recvmsg(sc, recvmsg); return ret; } -static int smb_direct_connect(struct smb_direct_transport *st) +static int smb_direct_connect(struct smbdirect_socket *sc) { - int ret; struct ib_qp_cap qp_cap; + int ret; - ret = smb_direct_init_params(st, &qp_cap); + ret = smb_direct_init_params(sc, &qp_cap); if (ret) { pr_err("Can't configure RDMA parameters\n"); return ret; } - ret = smb_direct_create_pools(st); + ret = smb_direct_create_pools(sc); if (ret) { pr_err("Can't init RDMA pool: %d\n", ret); return ret; } - ret = smb_direct_create_qpair(st, &qp_cap); + ret = smb_direct_create_qpair(sc, &qp_cap); if (ret) { pr_err("Can't accept RDMA client: %d\n", ret); return ret; } - ret = smb_direct_prepare_negotiation(st); + ret = smb_direct_prepare_negotiation(sc); if (ret) { pr_err("Can't negotiate: %d\n", ret); return ret; @@ -2081,10 +2216,15 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) return true; } -static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) +static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, + struct rdma_cm_event *event) { struct smb_direct_transport *t; + struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; struct task_struct *handler; + u8 peer_initiator_depth; + u8 peer_responder_resources; int ret; if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { @@ -2097,8 +2237,71 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) t = alloc_transport(new_cm_id); if (!t) return -ENOMEM; + sc = &t->socket; + sp = &sc->parameters; - ret = smb_direct_connect(t); + peer_initiator_depth = event->param.conn.initiator_depth; + peer_responder_resources = event->param.conn.responder_resources; + if (rdma_protocol_iwarp(new_cm_id->device, new_cm_id->port_num) && + event->param.conn.private_data_len == 8) { + /* + * Legacy clients with only iWarp MPA v1 support + * need a private blob in order to negotiate + * the IRD/ORD values. + */ + const __be32 *ird_ord_hdr = event->param.conn.private_data; + u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); + u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); + + /* + * cifs.ko sends the legacy IRD/ORD negotiation + * event if iWarp MPA v2 was used. + * + * Here we check that the values match and only + * mark the client as legacy if they don't match. + */ + if ((u32)event->param.conn.initiator_depth != ird32 || + (u32)event->param.conn.responder_resources != ord32) { + /* + * There are broken clients (old cifs.ko) + * using little endian and also + * struct rdma_conn_param only uses u8 + * for initiator_depth and responder_resources, + * so we truncate the value to U8_MAX. + * + * smb_direct_accept_client() will then + * do the real negotiation in order to + * select the minimum between client and + * server. + */ + ird32 = min_t(u32, ird32, U8_MAX); + ord32 = min_t(u32, ord32, U8_MAX); + + sc->rdma.legacy_iwarp = true; + peer_initiator_depth = (u8)ird32; + peer_responder_resources = (u8)ord32; + } + } + + /* + * First set what the we as server are able to support + */ + sp->initiator_depth = min_t(u8, sp->initiator_depth, + new_cm_id->device->attrs.max_qp_rd_atom); + + /* + * negotiate the value by using the minimum + * between client and server if the client provided + * non 0 values. + */ + if (peer_initiator_depth != 0) + sp->initiator_depth = min_t(u8, sp->initiator_depth, + peer_initiator_depth); + if (peer_responder_resources != 0) + sp->responder_resources = min_t(u8, sp->responder_resources, + peer_responder_resources); + + ret = smb_direct_connect(sc); if (ret) goto out_err; @@ -2122,7 +2325,7 @@ static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, { switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: { - int ret = smb_direct_handle_connect_request(cm_id); + int ret = smb_direct_handle_connect_request(cm_id, event); if (ret) { pr_err("Can't create transport: %d\n", ret); diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h index a2291b77488a..3f93c6a9f7e4 100644 --- a/fs/smb/server/transport_rdma.h +++ b/fs/smb/server/transport_rdma.h @@ -11,61 +11,20 @@ #define SMBD_MIN_IOSIZE (512 * 1024) #define SMBD_MAX_IOSIZE (16 * 1024 * 1024) -/* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */ -struct smb_direct_negotiate_req { - __le16 min_version; - __le16 max_version; - __le16 reserved; - __le16 credits_requested; - __le32 preferred_send_size; - __le32 max_receive_size; - __le32 max_fragmented_size; -} __packed; - -/* SMB DIRECT negotiation response packet [MS-SMBD] 2.2.2 */ -struct smb_direct_negotiate_resp { - __le16 min_version; - __le16 max_version; - __le16 negotiated_version; - __le16 reserved; - __le16 credits_requested; - __le16 credits_granted; - __le32 status; - __le32 max_readwrite_size; - __le32 preferred_send_size; - __le32 max_receive_size; - __le32 max_fragmented_size; -} __packed; - -#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001 - -/* SMB DIRECT data transfer packet with payload [MS-SMBD] 2.2.3 */ -struct smb_direct_data_transfer { - __le16 credits_requested; - __le16 credits_granted; - __le16 flags; - __le16 reserved; - __le32 remaining_data_length; - __le32 data_offset; - __le32 data_length; - __le32 padding; - __u8 buffer[]; -} __packed; - #ifdef CONFIG_SMB_SERVER_SMBDIRECT int ksmbd_rdma_init(void); void ksmbd_rdma_stop_listening(void); void ksmbd_rdma_destroy(void); bool ksmbd_rdma_capable_netdev(struct net_device *netdev); void init_smbd_max_io_size(unsigned int sz); -unsigned int get_smbd_max_read_write_size(void); +unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt); #else static inline int ksmbd_rdma_init(void) { return 0; } static inline void ksmbd_rdma_stop_listening(void) { } static inline void ksmbd_rdma_destroy(void) { } static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; } static inline void init_smbd_max_io_size(unsigned int sz) { } -static inline unsigned int get_smbd_max_read_write_size(void) { return 0; } +static inline unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) { return 0; } #endif #endif /* __KSMBD_TRANSPORT_RDMA_H__ */ |
