From 2b477c00f3bd87c3286f5940cb4174d8b01ee0d5 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Thu, 22 Dec 2016 12:38:06 -0500 Subject: svcrpc: free contexts immediately on PROC_DESTROY We currently handle a client PROC_DESTROY request by turning it CACHE_NEGATIVE, setting the expired time to now, and then waiting for cache_clean to clean it up later. Since we forgot to set the cache's nextcheck value, that could take up to 30 minutes. Also, though there's probably no real bug in this case, setting CACHE_NEGATIVE directly like this probably isn't a great idea in general. So let's just remove the entry from the cache directly, and move this bit of cache manipulation to a helper function. Signed-off-by: Neil Brown Reported-by: Andy Adamson Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 62a60eeacb0a..9dcf2c8fe1c5 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -227,6 +227,7 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd); extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, umode_t, struct cache_detail *); extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); +extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *); /* Must store cache_detail in seq_file->private if using next three functions */ extern void *cache_seq_start(struct seq_file *file, loff_t *pos); -- cgit From cbaf58032efca401834518b905f528ac912449e4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Feb 2017 11:58:15 -0500 Subject: svcrdma: Another sendto chunk list parsing update Commit 5fdca6531434 ("svcrdma: Renovate sendto chunk list parsing") missed a spot. svc_rdma_xdr_get_reply_hdr_len() also assumes the Write list has only one Write chunk. There's no harm in making this code more general. Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/rpc_rdma.h | 9 +++++++++ include/linux/sunrpc/svc_rdma.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index cfda6adcf33c..245fc59b7324 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -109,6 +109,15 @@ struct rpcrdma_msg { } rm_body; }; +/* + * XDR sizes, in quads + */ +enum { + rpcrdma_fixed_maxsz = 4, + rpcrdma_segment_maxsz = 4, + rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz, +}; + /* * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 757fb963696c..551c51816352 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -218,7 +218,7 @@ extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, struct rpcrdma_msg *, struct rpcrdma_msg *, enum rpcrdma_proc); -extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); +extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp); /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); -- cgit From 98fc21d3bfd55a36ce9eb7b32d1ce146f0d1696d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Feb 2017 11:58:23 -0500 Subject: svcrdma: Clean up RPC-over-RDMA Reply header encoder Replace C structure-based XDR decoding with pointer arithmetic. Pointer arithmetic is considered more portable, and is used throughout the kernel's existing XDR encoders. The gcc optimizer generates similar assembler code either way. Byte-swapping before a memory store on x86 typically results in an instruction pipeline stall. Avoid byte-swapping when encoding a new header. svcrdma currently doesn't alter a connection's credit grant value after the connection has been accepted, so it is effectively a constant. Cache the byte-swapped value in a separate field. Christoph suggested pulling the header encoding logic into the only function that uses it. Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 551c51816352..25ecf92cae96 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -141,7 +141,8 @@ struct svcxprt_rdma { atomic_t sc_sq_avail; /* SQEs ready to be consumed */ unsigned int sc_sq_depth; /* Depth of SQ */ unsigned int sc_rq_depth; /* Depth of RQ */ - u32 sc_max_requests; /* Forward credits */ + __be32 sc_fc_credits; /* Forward credits */ + u32 sc_max_requests; /* Max requests */ u32 sc_max_bc_requests;/* Backward credits */ int sc_max_req_size; /* Size of each RQ WR buf */ @@ -214,10 +215,6 @@ extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, __be32, __be64, u32); -extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, - struct rpcrdma_msg *, - struct rpcrdma_msg *, - enum rpcrdma_proc); extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp); /* svc_rdma_recvfrom.c */ -- cgit From aba7d14ba18c93a2ab37d50b057a885964ef285c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Feb 2017 11:58:48 -0500 Subject: svcrdma: Remove unused sc_dto_q field Clean up. Commit be99bb11400c ("svcrdma: Use new CQ API for RPC-over-RDMA server send CQs") removed code that used the sc_dto_q field, but neglected to remove sc_dto_q at the same time. Fixes: be99bb11400c ("svcrdma: Use new CQ API for RPC-over- ...") Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 25ecf92cae96..f77a7bc1612c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -172,7 +172,6 @@ struct svcxprt_rdma { wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ struct list_head sc_read_complete_q; struct work_struct sc_work; }; -- cgit From a3ab867fa64f9aedb3b01d570db5b43d2fc355fc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Feb 2017 11:58:56 -0500 Subject: svcrdma: Combine list fields in struct svc_rdma_op_ctxt Clean up: The free list and the dto_q list fields are never used at the same time. Reduce the size of struct svc_rdma_op_ctxt by combining these fields. Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f77a7bc1612c..b105f73e3ca2 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -70,7 +70,7 @@ extern atomic_t rdma_stat_sq_prod; * completes. */ struct svc_rdma_op_ctxt { - struct list_head free; + struct list_head list; struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_fastreg_mr *frmr; int hdr_count; @@ -78,7 +78,6 @@ struct svc_rdma_op_ctxt { struct ib_cqe cqe; struct ib_cqe reg_cqe; struct ib_cqe inv_cqe; - struct list_head dto_q; u32 byte_len; u32 position; struct svcxprt_rdma *xprt; -- cgit From d6fc8821c2d2aba4cc18447a467f543e46e7367d Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 8 Feb 2017 09:54:42 +0800 Subject: SUNRPC/Cache: Always treat the invalid cache as unexpired When the first time pynfs runs after rpc/nfsd startup, always get the warning, "Got error: Connection closed" I found the problem is caused by, 1. A new startup of nfsd, rpc.mountd, etc, 2. A rpc request from client (pynfs test, or normal mounting), 3. An ip_map cache is created but invalid, so upcall to rpc.mountd, 4. rpc.mountd process the ip_map upcall, before write the valid data to nfsd, do auth_reload(), and check_useipaddr(), 5. For the first time, old_use_ipaddr = -1, it causes rpc.mountd do write_flush that doing cache_clean, 6. The ip_map cache will be treat as expired and clean, 7. When rpc.mountd write the valid data to nfsd, a new ip_map is created and updated, the cache_check of old ip_map(doing the upcall) will return -ETIMEDOUT. 8. RPC layer return SVC_CLOSE and close the xprt after commit 4d712ef1db05 "svcauth_gss: Close connection when dropping an incoming message" NeilBrown suggest in another email, "If CACHE_VALID is not set, then there is no data in the cache item, so there is nothing to expire. So it would be nice if cache items that don't have CACHE_VALID are never treated as expired." v3, change the order of the two patches v2, change the checking of CACHE_PENDING to CACHE_VALID Reviewed-by: NeilBrown Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 9dcf2c8fe1c5..70738504d647 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -204,8 +204,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) kref_put(&h->ref, cd->cache_put); } -static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) +static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) { + if (!test_bit(CACHE_VALID, &h->flags)) + return false; + return (h->expiry_time < seconds_since_boot()) || (detail->flush_time >= h->last_refresh); } -- cgit From 05a45a2db42543c5f1a32e08f545aebbd7cb4790 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 24 Feb 2017 13:25:22 -0500 Subject: sunrpc: turn bitfield flags in svc_version into bools It's just simpler to read this way, IMO. Also, no need to explicitly set vs_hidden to false in the nfsacl ones. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7321ae933867..96467c95f02e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -400,10 +400,11 @@ struct svc_version { struct svc_procedure * vs_proc; /* per-procedure info */ u32 vs_xdrsize; /* xdrsize needed for this version */ - unsigned int vs_hidden : 1, /* Don't register with portmapper. - * Only used for nfsacl so far. */ - vs_rpcb_optnl:1;/* Don't care the result of register. - * Only used for nfsv4. */ + /* Don't register with rpcbind */ + bool vs_hidden; + + /* Don't care if the rpcbind registration fails */ + bool vs_rpcb_optnl; /* Override dispatch function (e.g. when caching replies). * A return value of 0 means drop the request. -- cgit From 362142b25843fb059941aaa01b91501d5d8652cc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 24 Feb 2017 13:25:23 -0500 Subject: sunrpc: flag transports as having congestion control NFSv4 requires a transport protocol with congestion control in most cases. On an IP network, that means that NFSv4 over UDP should be forbidden. The situation with RDMA is a bit more nuanced, but most RDMA transports are suitable for this. For now, we assume that all RDMA transports are suitable, but we may need to revise that at some point. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 7440290f64ac..ddb7f94a9d06 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -67,6 +67,7 @@ struct svc_xprt { #define XPT_CACHE_AUTH 11 /* cache auth info */ #define XPT_LOCAL 12 /* connection from loopback interface */ #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ +#define XPT_CONG_CTRL 14 /* has congestion control */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ -- cgit From 5283b03ee5cd28d516646298bead09b238d92ddc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 24 Feb 2017 13:25:24 -0500 Subject: nfs/nfsd/sunrpc: enforce transport requirements for NFSv4 NFSv4 requires a transport "that is specified to avoid network congestion" (RFC 7530, section 3.1, paragraph 2). In practical terms, that means that you should not run NFSv4 over UDP. The server has never enforced that requirement, however. This patchset fixes this by adding a new flag to the svc_version that states that it has these transport requirements. With that, we can check that the transport has XPT_CONG_CTRL set before processing an RPC. If it doesn't we reject it with RPC_PROG_MISMATCH. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 96467c95f02e..e770abeed32d 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -406,6 +406,9 @@ struct svc_version { /* Don't care if the rpcbind registration fails */ bool vs_rpcb_optnl; + /* Need xprt with congestion control */ + bool vs_need_cong_ctrl; + /* Override dispatch function (e.g. when caching replies). * A return value of 0 means drop the request. * vs_dispatch == NULL means use default dispatcher. -- cgit