diff options
Diffstat (limited to 'include/linux/sunrpc/svc_rdma.h')
| -rw-r--r-- | include/linux/sunrpc/svc_rdma.h | 443 |
1 files changed, 228 insertions, 215 deletions
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 0b8e3e6bdacf..57f4fd94166a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * @@ -41,270 +42,282 @@ #ifndef SVC_RDMA_H #define SVC_RDMA_H +#include <linux/llist.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/rpc_rdma.h> +#include <linux/sunrpc/rpc_rdma_cid.h> +#include <linux/sunrpc/svc_rdma_pcl.h> +#include <linux/sunrpc/rdma_rn.h> + +#include <linux/percpu_counter.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> -#define SVCRDMA_DEBUG + +/* Default and maximum inline threshold sizes */ +enum { + RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1, + RPCRDMA_DEF_INLINE_THRESH = 4096, + RPCRDMA_MAX_INLINE_THRESH = 65536 +}; /* RPC/RDMA parameters and stats */ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; +extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; +extern struct workqueue_struct *svcrdma_wq; -extern atomic_t rdma_stat_recv; -extern atomic_t rdma_stat_read; -extern atomic_t rdma_stat_write; -extern atomic_t rdma_stat_sq_starve; -extern atomic_t rdma_stat_rq_starve; -extern atomic_t rdma_stat_rq_poll; -extern atomic_t rdma_stat_rq_prod; -extern atomic_t rdma_stat_sq_poll; -extern atomic_t rdma_stat_sq_prod; - -#define RPCRDMA_VERSION 1 - -/* - * Contexts are built when an RDMA request is created and are a - * record of the resources that can be recovered when the request - * completes. - */ -struct svc_rdma_op_ctxt { - struct svc_rdma_op_ctxt *read_hdr; - struct svc_rdma_fastreg_mr *frmr; - int hdr_count; - struct xdr_buf arg; - struct list_head dto_q; - enum ib_wr_opcode wr_op; - enum ib_wc_status wc_status; - u32 byte_len; - struct svcxprt_rdma *xprt; - unsigned long flags; - enum dma_data_direction direction; - int count; - struct ib_sge sge[RPCSVC_MAXPAGES]; - struct page *pages[RPCSVC_MAXPAGES]; -}; - -/* - * NFS_ requests are mapped on the client side by the chunk lists in - * the RPCRDMA header. During the fetching of the RPC from the client - * and the writing of the reply to the client, the memory in the - * client and the memory in the server must be mapped as contiguous - * vaddr/len for access by the hardware. These data strucures keep - * these mappings. - * - * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the - * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the - * 'ch' field maps the read-list of the RPCRDMA header to the 'sge' - * mapping of the reply. - */ -struct svc_rdma_chunk_sge { - int start; /* sge no for this chunk */ - int count; /* sge count for this chunk */ -}; -struct svc_rdma_fastreg_mr { - struct ib_mr *mr; - void *kva; - struct ib_fast_reg_page_list *page_list; - int page_list_len; - unsigned long access_flags; - unsigned long map_len; - enum dma_data_direction direction; - struct list_head frmr_list; -}; -struct svc_rdma_req_map { - struct svc_rdma_fastreg_mr *frmr; - unsigned long count; - union { - struct kvec sge[RPCSVC_MAXPAGES]; - struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; - }; -}; -#define RDMACTXT_F_FAST_UNREG 1 -#define RDMACTXT_F_LAST_CTXT 2 - -#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ -#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ +extern struct percpu_counter svcrdma_stat_read; +extern struct percpu_counter svcrdma_stat_recv; +extern struct percpu_counter svcrdma_stat_sq_starve; +extern struct percpu_counter svcrdma_stat_write; struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ struct list_head sc_accept_q; /* Conn. waiting accept */ + struct rpcrdma_notification sc_rn; /* removal notification */ int sc_ord; /* RDMA read limit */ - int sc_max_sge; - - int sc_sq_depth; /* Depth of SQ */ - atomic_t sc_sq_count; /* Number of SQ WR on queue */ - - int sc_max_requests; /* Depth of RQ */ + int sc_max_send_sges; + bool sc_snd_w_inv; /* OK to use Send With Invalidate */ + + atomic_t sc_sq_avail; /* SQEs ready to be consumed */ + unsigned int sc_sq_depth; /* Depth of SQ */ + __be32 sc_fc_credits; /* Forward credits */ + u32 sc_max_requests; /* Max requests */ + u32 sc_max_bc_requests;/* Backward credits */ int sc_max_req_size; /* Size of each RQ WR buf */ + u8 sc_port_num; struct ib_pd *sc_pd; - atomic_t sc_dma_used; - atomic_t sc_ctxt_used; + spinlock_t sc_send_lock; + struct llist_head sc_send_ctxts; + spinlock_t sc_rw_ctxt_lock; + struct llist_head sc_rw_ctxts; + + u32 sc_pending_recvs; + u32 sc_recv_batch; struct list_head sc_rq_dto_q; + struct list_head sc_read_complete_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; - struct ib_mr *sc_phys_mr; /* MR for server memory */ - u32 sc_dev_caps; /* distilled device caps */ - u32 sc_dma_lkey; /* local dma key */ - unsigned int sc_frmr_pg_list_len; - struct list_head sc_frmr_q; - spinlock_t sc_frmr_q_lock; spinlock_t sc_lock; /* transport lock */ wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ - struct list_head sc_read_complete_q; struct work_struct sc_work; + + struct llist_head sc_recv_ctxts; + + atomic_t sc_completion_ids; }; /* sc_flags */ -#define RDMAXPRT_RQ_PENDING 1 -#define RDMAXPRT_SQ_PENDING 2 #define RDMAXPRT_CONN_PENDING 3 -#define RPCRDMA_LISTEN_BACKLOG 10 -/* The default ORD value is based on two outstanding full-size writes with a - * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ -#define RPCRDMA_ORD (64/4) -#define RPCRDMA_SQ_DEPTH_MULT 8 -#define RPCRDMA_MAX_THREADS 16 -#define RPCRDMA_MAX_REQUESTS 16 -#define RPCRDMA_MAX_REQ_SIZE 4096 - -/* svc_rdma_marshal.c */ -extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, - int *, int *); -extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); -extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); -extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, - struct rpcrdma_msg *, - enum rpcrdma_errcode, u32 *); -extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); -extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); -extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, - __be32, __be64, u32); -extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, - struct rpcrdma_msg *, - struct rpcrdma_msg *, - enum rpcrdma_proc); -extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); +static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp) +{ + struct svc_xprt *xprt = rqstp->rq_xprt; -/* svc_rdma_recvfrom.c */ -extern int svc_rdma_recvfrom(struct svc_rqst *); + return container_of(xprt, struct svcxprt_rdma, sc_xprt); +} -/* svc_rdma_sendto.c */ -extern int svc_rdma_sendto(struct svc_rqst *); +/* + * Default connection parameters + */ +enum { + RPCRDMA_LISTEN_BACKLOG = 10, + RPCRDMA_MAX_REQUESTS = 128, + RPCRDMA_MAX_BC_REQUESTS = 2, +}; -/* svc_rdma_transport.c */ -extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); -extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, - enum rpcrdma_errcode); -struct page *svc_rdma_get_page(void); -extern int svc_rdma_post_recv(struct svcxprt_rdma *); -extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); -extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); -extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); -extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); -extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); -extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); -extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); -extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); -extern void svc_rdma_put_frmr(struct svcxprt_rdma *, - struct svc_rdma_fastreg_mr *); -extern void svc_sq_reap(struct svcxprt_rdma *); -extern void svc_rq_reap(struct svcxprt_rdma *); -extern struct svc_xprt_class svc_rdma_class; -extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); +#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD -/* svc_rdma.c */ -extern int svc_rdma_init(void); -extern void svc_rdma_cleanup(void); - -/* - * Returns the address of the first read chunk or <nul> if no read chunk is - * present +/** + * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize */ -static inline struct rpcrdma_read_chunk * -svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) { - struct rpcrdma_read_chunk *ch = - (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - - if (ch->rc_discrim == 0) - return NULL; + cid->ci_queue_id = rdma->sc_rq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} - return ch; +/** + * svc_rdma_send_cid_init - Initialize a Send Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize + */ +static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_sq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); } /* - * Returns the address of the first read write array element or <nul> if no - * write array list is present + * A chunk context tracks all I/O for moving one Read or Write + * chunk. This is a set of rdma_rw's that handle data movement + * for all segments of one chunk. */ -static inline struct rpcrdma_write_array * -svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) -{ - if (rmsgp->rm_body.rm_chunks[0] != 0 - || rmsgp->rm_body.rm_chunks[1] == 0) - return NULL; +struct svc_rdma_chunk_ctxt { + struct rpc_rdma_cid cc_cid; + struct ib_cqe cc_cqe; + struct list_head cc_rwctxts; + ktime_t cc_posttime; + int cc_sqecount; +}; - return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; -} +struct svc_rdma_recv_ctxt { + struct llist_node rc_node; + struct list_head rc_list; + struct ib_recv_wr rc_recv_wr; + struct ib_cqe rc_cqe; + struct rpc_rdma_cid rc_cid; + struct ib_sge rc_recv_sge; + void *rc_recv_buf; + struct xdr_stream rc_stream; + u32 rc_byte_len; + u32 rc_inv_rkey; + __be32 rc_msgtype; + + /* State for pulling a Read chunk */ + unsigned int rc_pageoff; + unsigned int rc_curpage; + unsigned int rc_readbytes; + struct xdr_buf rc_saved_arg; + struct svc_rdma_chunk_ctxt rc_cc; + + struct svc_rdma_pcl rc_call_pcl; + + struct svc_rdma_pcl rc_read_pcl; + struct svc_rdma_chunk *rc_cur_result_payload; + struct svc_rdma_pcl rc_write_pcl; + struct svc_rdma_pcl rc_reply_pcl; + + unsigned int rc_page_count; + unsigned long rc_maxpages; + struct page *rc_pages[] __counted_by(rc_maxpages); +}; /* - * Returns the address of the first reply array element or <nul> if no - * reply array is present + * State for sending a Write chunk. + * - Tracks progress of writing one chunk over all its segments + * - Stores arguments for the SGL constructor functions */ -static inline struct rpcrdma_write_array * -svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) -{ - struct rpcrdma_read_chunk *rch; - struct rpcrdma_write_array *wr_ary; - struct rpcrdma_write_array *rp_ary; - - /* XXX: Need to fix when reply list may occur with read-list and/or - * write list */ - if (rmsgp->rm_body.rm_chunks[0] != 0 || - rmsgp->rm_body.rm_chunks[1] != 0) - return NULL; - - rch = svc_rdma_get_read_chunk(rmsgp); - if (rch) { - while (rch->rc_discrim) - rch++; - - /* The reply list follows an empty write array located - * at 'rc_position' here. The reply array is at rc_target. - */ - rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; - - goto found_it; - } - - wr_ary = svc_rdma_get_write_array(rmsgp); - if (wr_ary) { - rp_ary = (struct rpcrdma_write_array *) - &wr_ary-> - wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length; - - goto found_it; - } - - /* No read list, no write list */ - rp_ary = (struct rpcrdma_write_array *) - &rmsgp->rm_body.rm_chunks[2]; - - found_it: - if (rp_ary->wc_discrim == 0) - return NULL; - - return rp_ary; -} +struct svc_rdma_write_info { + struct svcxprt_rdma *wi_rdma; + + const struct svc_rdma_chunk *wi_chunk; + + /* write state of this chunk */ + unsigned int wi_seg_off; + unsigned int wi_seg_no; + + /* SGL constructor arguments */ + const struct xdr_buf *wi_xdr; + unsigned char *wi_base; + unsigned int wi_next_off; + + struct svc_rdma_chunk_ctxt wi_cc; + struct work_struct wi_work; +}; + +struct svc_rdma_send_ctxt { + struct llist_node sc_node; + struct rpc_rdma_cid sc_cid; + struct work_struct sc_work; + + struct svcxprt_rdma *sc_rdma; + struct ib_send_wr sc_send_wr; + struct ib_send_wr *sc_wr_chain; + int sc_sqecount; + struct ib_cqe sc_cqe; + struct xdr_buf sc_hdrbuf; + struct xdr_stream sc_stream; + struct svc_rdma_write_info sc_reply_info; + void *sc_xprt_buf; + int sc_page_count; + int sc_cur_sge_no; + unsigned long sc_maxpages; + struct page **sc_pages; + struct ib_sge sc_sges[]; +}; + +/* svc_rdma_backchannel.c */ +extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt); + +/* svc_rdma_recvfrom.c */ +extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); +extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); +extern struct svc_rdma_recv_ctxt * + svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma); +extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, + struct svc_rdma_recv_ctxt *ctxt); +extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); +extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt); +extern int svc_rdma_recvfrom(struct svc_rqst *); + +/* svc_rdma_rw.c */ +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); +extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); +extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc, + enum dma_data_direction dir); +extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + struct svc_rdma_send_ctxt *sctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, + struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head); + +/* svc_rdma_sendto.c */ +extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); +extern struct svc_rdma_send_ctxt * + svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); +extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_post_send(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *sctxt, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + const struct xdr_buf *xdr); +extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *sctxt, + struct svc_rdma_recv_ctxt *rctxt, + int status); +extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail); +extern int svc_rdma_sendto(struct svc_rqst *); +extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length); + +/* svc_rdma_transport.c */ +extern struct svc_xprt_class svc_rdma_class; +#ifdef CONFIG_SUNRPC_BACKCHANNEL +extern struct svc_xprt_class svc_rdma_bc_class; +#endif + +/* svc_rdma.c */ +extern int svc_rdma_init(void); +extern void svc_rdma_cleanup(void); + #endif |
