From ae67bd3821bb0a54d97e7883d211196637d487a9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:44 -0400 Subject: SUNRPC: Fix up task signalling The RPC_TASK_KILLED flag should really not be set from another context because it can clobber data in the struct task when task->tk_flags is changed non-atomically. Let's therefore swap out RPC_TASK_KILLED with an atomic flag, and add a function to set that flag and safely wake up the task. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 14 ++------------ net/sunrpc/sched.c | 28 +++++++++++++++++++++++----- net/sunrpc/xprt.c | 4 ++++ 3 files changed, 29 insertions(+), 17 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8ff11dc98d7f..18f5392aa550 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -827,14 +827,8 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) * Spin lock all_tasks to prevent changes... */ spin_lock(&clnt->cl_lock); - list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { - if (!RPC_IS_ACTIVATED(rovr)) - continue; - if (!(rovr->tk_flags & RPC_TASK_KILLED)) { - rovr->tk_flags |= RPC_TASK_KILLED; - rpc_exit(rovr, -EIO); - } - } + list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) + rpc_signal_task(rovr); spin_unlock(&clnt->cl_lock); } EXPORT_SYMBOL_GPL(rpc_killall_tasks); @@ -1477,8 +1471,6 @@ EXPORT_SYMBOL_GPL(rpc_force_rebind); int rpc_restart_call_prepare(struct rpc_task *task) { - if (RPC_ASSASSINATED(task)) - return 0; task->tk_action = call_start; task->tk_status = 0; if (task->tk_ops->rpc_call_prepare != NULL) @@ -1494,8 +1486,6 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); int rpc_restart_call(struct rpc_task *task) { - if (RPC_ASSASSINATED(task)) - return 0; task->tk_action = call_start; task->tk_status = 0; return 1; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 28956c70100a..3d6cb91ba598 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -759,8 +759,7 @@ static void rpc_reset_task_statistics(struct rpc_task *task) { task->tk_timeouts = 0; - task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT); - + task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_SENT); rpc_init_task_statistics(task); } @@ -773,7 +772,6 @@ void rpc_exit_task(struct rpc_task *task) if (task->tk_ops->rpc_call_done != NULL) { task->tk_ops->rpc_call_done(task, task->tk_calldata); if (task->tk_action != NULL) { - WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ xprt_release(task); rpc_reset_task_statistics(task); @@ -781,6 +779,19 @@ void rpc_exit_task(struct rpc_task *task) } } +void rpc_signal_task(struct rpc_task *task) +{ + struct rpc_wait_queue *queue; + + if (!RPC_IS_ACTIVATED(task)) + return; + set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); + smp_mb__after_atomic(); + queue = READ_ONCE(task->tk_waitqueue); + if (queue) + rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS); +} + void rpc_exit(struct rpc_task *task, int status) { task->tk_status = status; @@ -836,6 +847,13 @@ static void __rpc_execute(struct rpc_task *task) */ if (!RPC_IS_QUEUED(task)) continue; + + /* + * Signalled tasks should exit rather than sleep. + */ + if (RPC_SIGNALLED(task)) + rpc_exit(task, -ERESTARTSYS); + /* * The queue->lock protects against races with * rpc_make_runnable(). @@ -861,7 +879,7 @@ static void __rpc_execute(struct rpc_task *task) status = out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_QUEUED, rpc_wait_bit_killable, TASK_KILLABLE); - if (status == -ERESTARTSYS) { + if (status < 0) { /* * When a sync task receives a signal, it exits with * -ERESTARTSYS. In order to catch any callbacks that @@ -869,7 +887,7 @@ static void __rpc_execute(struct rpc_task *task) * break the loop here, but go around once more. */ dprintk("RPC: %5u got signal\n", task->tk_pid); - task->tk_flags |= RPC_TASK_KILLED; + set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); rpc_exit(task, -ERESTARTSYS); } dprintk("RPC: %5u sync task resuming\n", task->tk_pid); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d7117d241460..3a4156cb0134 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1337,6 +1337,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) if (status < 0) goto out_dequeue; } + if (RPC_SIGNALLED(task)) { + status = -ERESTARTSYS; + goto out_dequeue; + } } /* -- cgit From 9e6fa0bb84beeff4dddb17d7c23e35135fe977c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:45 -0400 Subject: SUNRPC: Refactor rpc_restart_call/rpc_restart_call_prepare Clean up. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 18f5392aa550..af1dfc2a8fb1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1464,20 +1464,13 @@ void rpc_force_rebind(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_force_rebind); -/* - * Restart an (async) RPC call from the call_prepare state. - * Usually called from within the exit handler. - */ -int -rpc_restart_call_prepare(struct rpc_task *task) +static int +__rpc_restart_call(struct rpc_task *task, void (*action)(struct rpc_task *)) { - task->tk_action = call_start; task->tk_status = 0; - if (task->tk_ops->rpc_call_prepare != NULL) - task->tk_action = rpc_prepare_task; + task->tk_action = action; return 1; } -EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); /* * Restart an (async) RPC call. Usually called from within the @@ -1486,12 +1479,23 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); int rpc_restart_call(struct rpc_task *task) { - task->tk_action = call_start; - task->tk_status = 0; - return 1; + return __rpc_restart_call(task, call_start); } EXPORT_SYMBOL_GPL(rpc_restart_call); +/* + * Restart an (async) RPC call from the call_prepare state. + * Usually called from within the exit handler. + */ +int +rpc_restart_call_prepare(struct rpc_task *task) +{ + if (task->tk_ops->rpc_call_prepare != NULL) + return __rpc_restart_call(task, rpc_prepare_task); + return rpc_restart_call(task); +} +EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); + const char *rpc_proc_name(const struct rpc_task *task) { -- cgit From 8ba6a92d0182091e0c2fa15c1a5b5458bac25fc3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:46 -0400 Subject: SUNRPC: Refactor xprt_request_wait_receive() Convert the transport callback to actually put the request to sleep instead of just setting a timeout. This is in preparation for rpc_sleep_on_timeout(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 79 ++++++++++++++++-------------- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 +- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtsock.c | 8 +-- 4 files changed, 48 insertions(+), 43 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3a4156cb0134..5afffa669d04 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -554,41 +554,6 @@ bool xprt_write_space(struct rpc_xprt *xprt) } EXPORT_SYMBOL_GPL(xprt_write_space); -/** - * xprt_set_retrans_timeout_def - set a request's retransmit timeout - * @task: task whose timeout is to be set - * - * Set a request's retransmit timeout based on the transport's - * default timeout parameters. Used by transports that don't adjust - * the retransmit timeout based on round-trip time estimation. - */ -void xprt_set_retrans_timeout_def(struct rpc_task *task) -{ - task->tk_timeout = task->tk_rqstp->rq_timeout; -} -EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def); - -/** - * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout - * @task: task whose timeout is to be set - * - * Set a request's retransmit timeout using the RTT estimator. - */ -void xprt_set_retrans_timeout_rtt(struct rpc_task *task) -{ - int timer = task->tk_msg.rpc_proc->p_timer; - struct rpc_clnt *clnt = task->tk_client; - struct rpc_rtt *rtt = clnt->cl_rtt; - struct rpc_rqst *req = task->tk_rqstp; - unsigned long max_timeout = clnt->cl_timeout->to_maxval; - - task->tk_timeout = rpc_calc_rto(rtt, timer); - task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; - if (task->tk_timeout > max_timeout || task->tk_timeout == 0) - task->tk_timeout = max_timeout; -} -EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt); - static void xprt_reset_majortimeo(struct rpc_rqst *req) { const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; @@ -1102,6 +1067,47 @@ static void xprt_timer(struct rpc_task *task) task->tk_status = 0; } +/** + * xprt_wait_for_reply_request_def - wait for reply + * @task: pointer to rpc_task + * + * Set a request's retransmit timeout based on the transport's + * default timeout parameters. Used by transports that don't adjust + * the retransmit timeout based on round-trip time estimation, + * and put the task to sleep on the pending queue. + */ +void xprt_wait_for_reply_request_def(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + task->tk_timeout = req->rq_timeout; + rpc_sleep_on(&req->rq_xprt->pending, task, xprt_timer); +} +EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def); + +/** + * xprt_wait_for_reply_request_rtt - wait for reply using RTT estimator + * @task: pointer to rpc_task + * + * Set a request's retransmit timeout using the RTT estimator, + * and put the task to sleep on the pending queue. + */ +void xprt_wait_for_reply_request_rtt(struct rpc_task *task) +{ + int timer = task->tk_msg.rpc_proc->p_timer; + struct rpc_clnt *clnt = task->tk_client; + struct rpc_rtt *rtt = clnt->cl_rtt; + struct rpc_rqst *req = task->tk_rqstp; + unsigned long max_timeout = clnt->cl_timeout->to_maxval; + + task->tk_timeout = rpc_calc_rto(rtt, timer); + task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; + if (task->tk_timeout > max_timeout || task->tk_timeout == 0) + task->tk_timeout = max_timeout; + rpc_sleep_on(&req->rq_xprt->pending, task, xprt_timer); +} +EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt); + /** * xprt_request_wait_receive - wait for the reply to an RPC request * @task: RPC task about to send a request @@ -1121,8 +1127,7 @@ void xprt_request_wait_receive(struct rpc_task *task) */ spin_lock(&xprt->queue_lock); if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { - xprt->ops->set_retrans_timeout(task); - rpc_sleep_on(&xprt->pending, task, xprt_timer); + xprt->ops->wait_for_reply_request(task); /* * Send an extra queue wakeup call if the * connection was dropped in case the call to diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 907464c2a9f0..bed57d8b5c19 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -261,7 +261,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = { .buf_alloc = xprt_rdma_bc_allocate, .buf_free = xprt_rdma_bc_free, .send_request = xprt_rdma_bc_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_def, + .wait_for_reply_request = xprt_wait_for_reply_request_def, .close = xprt_rdma_bc_close, .destroy = xprt_rdma_bc_put, .print_stats = xprt_rdma_print_stats diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 5d261353bd90..7e73abe01cfe 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -815,7 +815,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = { .alloc_slot = xprt_rdma_alloc_slot, .free_slot = xprt_rdma_free_slot, .release_request = xprt_release_rqst_cong, /* ditto */ - .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ + .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */ .timer = xprt_rdma_timer, .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ .set_port = xprt_rdma_set_port, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 732d4b57411a..b4b4b8db143c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2690,7 +2690,7 @@ static const struct rpc_xprt_ops xs_local_ops = { .buf_free = rpc_free, .prepare_request = xs_stream_prepare_request, .send_request = xs_local_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_def, + .wait_for_reply_request = xprt_wait_for_reply_request_def, .close = xs_close, .destroy = xs_destroy, .print_stats = xs_local_print_stats, @@ -2710,7 +2710,7 @@ static const struct rpc_xprt_ops xs_udp_ops = { .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_udp_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_rtt, + .wait_for_reply_request = xprt_wait_for_reply_request_rtt, .timer = xs_udp_timer, .release_request = xprt_release_rqst_cong, .close = xs_close, @@ -2733,7 +2733,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .prepare_request = xs_stream_prepare_request, .send_request = xs_tcp_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_def, + .wait_for_reply_request = xprt_wait_for_reply_request_def, .close = xs_tcp_shutdown, .destroy = xs_destroy, .set_connect_timeout = xs_tcp_set_connect_timeout, @@ -2761,7 +2761,7 @@ static const struct rpc_xprt_ops bc_tcp_ops = { .buf_alloc = bc_malloc, .buf_free = bc_free, .send_request = bc_send_request, - .set_retrans_timeout = xprt_set_retrans_timeout_def, + .wait_for_reply_request = xprt_wait_for_reply_request_def, .close = bc_close, .destroy = bc_destroy, .print_stats = xs_tcp_print_stats, -- cgit From 87150aaed9e55d8b18a94aa2589aa4331429fce8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:47 -0400 Subject: SUNRPC: Refactor rpc_sleep_on() rpc_sleep_on() does not need to set the task->tk_callback under the queue lock, so move that out. Also refactor the check for whether the task is active. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/sched.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 3d6cb91ba598..8e96a841dd11 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -362,7 +362,6 @@ static void rpc_make_runnable(struct workqueue_struct *wq, */ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, unsigned char queue_priority) { dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", @@ -372,27 +371,39 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, __rpc_add_wait_queue(q, task, queue_priority); - WARN_ON_ONCE(task->tk_callback != NULL); - task->tk_callback = action; __rpc_add_timer(q, task); } -void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action) +static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action) +{ + if (action && !WARN_ON_ONCE(task->tk_callback != NULL)) + task->tk_callback = action; +} + +static bool rpc_sleep_check_activated(struct rpc_task *task) { /* We shouldn't ever put an inactive task to sleep */ - WARN_ON_ONCE(!RPC_IS_ACTIVATED(task)); - if (!RPC_IS_ACTIVATED(task)) { + if (WARN_ON_ONCE(!RPC_IS_ACTIVATED(task))) { task->tk_status = -EIO; rpc_put_task_async(task); - return; + return false; } + return true; +} + +void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, + rpc_action action) +{ + if (!rpc_sleep_check_activated(task)) + return; + + rpc_set_tk_callback(task, action); /* * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on_priority(q, task, action, task->tk_priority); + __rpc_sleep_on_priority(q, task, task->tk_priority); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on); @@ -400,19 +411,16 @@ EXPORT_SYMBOL_GPL(rpc_sleep_on); void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, int priority) { - /* We shouldn't ever put an inactive task to sleep */ - WARN_ON_ONCE(!RPC_IS_ACTIVATED(task)); - if (!RPC_IS_ACTIVATED(task)) { - task->tk_status = -EIO; - rpc_put_task_async(task); + if (!rpc_sleep_check_activated(task)) return; - } + + rpc_set_tk_callback(task, action); /* * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW); + __rpc_sleep_on_priority(q, task, priority - RPC_PRIORITY_LOW); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on_priority); -- cgit From 8357a9b60fe7500699a9dec540ca1c48df3cb455 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:48 -0400 Subject: SUNRPC: Remove unused argument 'action' from rpc_sleep_on_priority() None of the callers set the 'action' argument, so let's just remove it. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/sched.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 8e96a841dd11..04170c08b2cf 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -409,18 +409,17 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, EXPORT_SYMBOL_GPL(rpc_sleep_on); void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, int priority) + int priority) { if (!rpc_sleep_check_activated(task)) return; - rpc_set_tk_callback(task, action); - + priority -= RPC_PRIORITY_LOW; /* * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on_priority(q, task, priority - RPC_PRIORITY_LOW); + __rpc_sleep_on_priority(q, task, priority); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on_priority); -- cgit From 6b2e6856275d7b8d0acbf06d2e8da72e1a6bc857 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:49 -0400 Subject: SUNRPC: Add function rpc_sleep_on_timeout() Clean up the RPC task sleep interfaces by replacing the task->tk_timeout 'hidden parameter' to rpc_sleep_on() with a new function that takes an absolute timeout. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/auth_gss/auth_gss.c | 5 ++- net/sunrpc/clnt.c | 1 - net/sunrpc/rpcb_clnt.c | 3 +- net/sunrpc/sched.c | 69 ++++++++++++++++++++++++++++++++++-------- net/sunrpc/xprt.c | 36 +++++++++++++--------- 5 files changed, 81 insertions(+), 33 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3fd56c0c90ae..c055edfec55e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -581,8 +581,8 @@ gss_refresh_upcall(struct rpc_task *task) /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ warn_gssd(); - task->tk_timeout = 15*HZ; - rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL); + rpc_sleep_on_timeout(&pipe_version_rpc_waitqueue, + task, NULL, jiffies + (15 * HZ)); err = -EAGAIN; goto out; } @@ -595,7 +595,6 @@ gss_refresh_upcall(struct rpc_task *task) if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { - task->tk_timeout = 0; gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ refcount_inc(&gss_msg->count); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af1dfc2a8fb1..216d5e5e3b54 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1851,7 +1851,6 @@ call_bind(struct rpc_task *task) if (!xprt_prepare_transmit(task)) return; - task->tk_timeout = xprt->bind_timeout; xprt->ops->rpcbind(task); } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 41a971ac1c63..18b0cf2a923f 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -694,7 +694,8 @@ void rpcb_getport_async(struct rpc_task *task) /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ - rpc_sleep_on(&xprt->binding, task, NULL); + rpc_sleep_on_timeout(&xprt->binding, task, + NULL, jiffies + xprt->bind_timeout); if (xprt_test_and_set_binding(xprt)) { dprintk("RPC: %5u %s: waiting for another binder\n", diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 04170c08b2cf..7e0f7b83262f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -66,7 +66,7 @@ struct workqueue_struct *xprtiod_workqueue __read_mostly; static void __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (task->tk_timeout == 0) + if (list_empty(&task->u.tk_wait.timer_list)) return; dprintk("RPC: %5u disabling timer\n", task->tk_pid); task->tk_timeout = 0; @@ -86,17 +86,15 @@ rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) * Set up a timer for the current task. */ static void -__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) +__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task, + unsigned long timeout) { - if (!task->tk_timeout) - return; - dprintk("RPC: %5u setting alarm for %u ms\n", - task->tk_pid, jiffies_to_msecs(task->tk_timeout)); + task->tk_pid, jiffies_to_msecs(timeout - jiffies)); - task->u.tk_wait.expires = jiffies + task->tk_timeout; - if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) - rpc_set_queue_timer(queue, task->u.tk_wait.expires); + task->tk_timeout = timeout; + if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires)) + rpc_set_queue_timer(queue, timeout); list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } @@ -188,6 +186,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, if (RPC_IS_QUEUED(task)) return; + INIT_LIST_HEAD(&task->u.tk_wait.timer_list); if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task, queue_priority); else if (RPC_IS_SWAPPER(task)) @@ -371,7 +370,17 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, __rpc_add_wait_queue(q, task, queue_priority); - __rpc_add_timer(q, task); +} + +static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q, + struct rpc_task *task, unsigned long timeout, + unsigned char queue_priority) +{ + if (time_is_after_jiffies(timeout)) { + __rpc_sleep_on_priority(q, task, queue_priority); + __rpc_add_timer(q, task, timeout); + } else + task->tk_status = -ETIMEDOUT; } static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action) @@ -391,6 +400,23 @@ static bool rpc_sleep_check_activated(struct rpc_task *task) return true; } +void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task, + rpc_action action, unsigned long timeout) +{ + if (!rpc_sleep_check_activated(task)) + return; + + rpc_set_tk_callback(task, action); + + /* + * Protect the queue operations. + */ + spin_lock_bh(&q->lock); + __rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority); + spin_unlock_bh(&q->lock); +} +EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout); + void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action) { @@ -399,6 +425,7 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_set_tk_callback(task, action); + WARN_ON_ONCE(task->tk_timeout != 0); /* * Protect the queue operations. */ @@ -408,12 +435,29 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, } EXPORT_SYMBOL_GPL(rpc_sleep_on); +void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q, + struct rpc_task *task, unsigned long timeout, int priority) +{ + if (!rpc_sleep_check_activated(task)) + return; + + priority -= RPC_PRIORITY_LOW; + /* + * Protect the queue operations. + */ + spin_lock_bh(&q->lock); + __rpc_sleep_on_priority_timeout(q, task, timeout, priority); + spin_unlock_bh(&q->lock); +} +EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout); + void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, int priority) { if (!rpc_sleep_check_activated(task)) return; + WARN_ON_ONCE(task->tk_timeout != 0); priority -= RPC_PRIORITY_LOW; /* * Protect the queue operations. @@ -711,7 +755,7 @@ static void __rpc_queue_timer_fn(struct timer_list *t) spin_lock(&queue->lock); expires = now = jiffies; list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { - timeo = task->u.tk_wait.expires; + timeo = task->tk_timeout; if (time_after_eq(now, timeo)) { dprintk("RPC: %5u timeout\n", task->tk_pid); task->tk_status = -ETIMEDOUT; @@ -737,8 +781,7 @@ static void __rpc_atrun(struct rpc_task *task) */ void rpc_delay(struct rpc_task *task, unsigned long delay) { - task->tk_timeout = delay; - rpc_sleep_on(&delay_queue, task, __rpc_atrun); + rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay); } EXPORT_SYMBOL_GPL(rpc_delay); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5afffa669d04..7c3623b17493 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -209,9 +209,12 @@ out_unlock: out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); - task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0; task->tk_status = -EAGAIN; - rpc_sleep_on(&xprt->sending, task, NULL); + if (RPC_IS_SOFT(task)) + rpc_sleep_on_timeout(&xprt->sending, task, NULL, + jiffies + req->rq_timeout); + else + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); @@ -273,9 +276,12 @@ out_unlock: xprt_clear_locked(xprt); out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); - task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0; task->tk_status = -EAGAIN; - rpc_sleep_on(&xprt->sending, task, NULL); + if (RPC_IS_SOFT(task)) + rpc_sleep_on_timeout(&xprt->sending, task, NULL, + jiffies + req->rq_timeout); + else + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -787,9 +793,9 @@ void xprt_connect(struct rpc_task *task) xprt->ops->close(xprt); if (!xprt_connected(xprt)) { - task->tk_timeout = task->tk_rqstp->rq_timeout; task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; - rpc_sleep_on(&xprt->pending, task, NULL); + rpc_sleep_on_timeout(&xprt->pending, task, NULL, + jiffies + task->tk_rqstp->rq_timeout); if (test_bit(XPRT_CLOSING, &xprt->state)) return; @@ -1080,8 +1086,8 @@ void xprt_wait_for_reply_request_def(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - task->tk_timeout = req->rq_timeout; - rpc_sleep_on(&req->rq_xprt->pending, task, xprt_timer); + rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer, + jiffies + req->rq_timeout); } EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def); @@ -1099,12 +1105,14 @@ void xprt_wait_for_reply_request_rtt(struct rpc_task *task) struct rpc_rtt *rtt = clnt->cl_rtt; struct rpc_rqst *req = task->tk_rqstp; unsigned long max_timeout = clnt->cl_timeout->to_maxval; + unsigned long timeout; - task->tk_timeout = rpc_calc_rto(rtt, timer); - task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; - if (task->tk_timeout > max_timeout || task->tk_timeout == 0) - task->tk_timeout = max_timeout; - rpc_sleep_on(&req->rq_xprt->pending, task, xprt_timer); + timeout = rpc_calc_rto(rtt, timer); + timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; + if (timeout > max_timeout || timeout == 0) + timeout = max_timeout; + rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer, + jiffies + timeout); } EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt); @@ -1656,7 +1664,6 @@ void xprt_reserve(struct rpc_task *task) if (task->tk_rqstp != NULL) return; - task->tk_timeout = 0; task->tk_status = -EAGAIN; if (!xprt_throttle_congested(xprt, task)) xprt_do_reserve(xprt, task); @@ -1679,7 +1686,6 @@ void xprt_retry_reserve(struct rpc_task *task) if (task->tk_rqstp != NULL) return; - task->tk_timeout = 0; task->tk_status = -EAGAIN; xprt_do_reserve(xprt, task); } -- cgit From 5efd1876e61fe61b61e2d056782027c11bcd0982 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:50 -0400 Subject: SUNRPC: Fix up tracking of timeouts Add a helper to ensure that debugfs and friends print out the correct current task timeout value. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 2 +- net/sunrpc/debugfs.c | 2 +- net/sunrpc/sched.c | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 216d5e5e3b54..b25f317d0ee2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2874,7 +2874,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt, printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n", task->tk_pid, task->tk_flags, task->tk_status, - clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, + clnt, task->tk_rqstp, rpc_task_timeout(task), task->tk_ops, clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), task->tk_action, rpc_waitq); } diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 19bb356230ed..95ebd76b132d 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -33,7 +33,7 @@ tasks_show(struct seq_file *f, void *v) seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n", task->tk_pid, task->tk_flags, task->tk_status, - clnt->cl_clid, xid, task->tk_timeout, task->tk_ops, + clnt->cl_clid, xid, rpc_task_timeout(task), task->tk_ops, clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), task->tk_action, rpc_waitq); return 0; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 7e0f7b83262f..40944c34a9e4 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -58,6 +58,20 @@ static struct rpc_wait_queue delay_queue; struct workqueue_struct *rpciod_workqueue __read_mostly; struct workqueue_struct *xprtiod_workqueue __read_mostly; +unsigned long +rpc_task_timeout(const struct rpc_task *task) +{ + unsigned long timeout = READ_ONCE(task->tk_timeout); + + if (timeout != 0) { + unsigned long now = jiffies; + if (time_before(now, timeout)) + return timeout - now; + } + return 0; +} +EXPORT_SYMBOL_GPL(rpc_task_timeout); + /* * Disable the timer for a given RPC task. Should be called with * queue->lock and bh_disabled in order to avoid races within -- cgit From 24a9d9a21e568f494198eea2bb864e0b6c593051 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:51 -0400 Subject: SUNRPC: Simplify queue timeouts using timer_reduce() Simplify the setting of queue timeouts by using the timer_reduce() function. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/sched.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 40944c34a9e4..301e0f7f1dc9 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -92,8 +92,7 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) static void rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) { - queue->timer_list.expires = expires; - mod_timer(&queue->timer_list.timer, expires); + timer_reduce(&queue->timer_list.timer, expires); } /* @@ -107,8 +106,7 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task, task->tk_pid, jiffies_to_msecs(timeout - jiffies)); task->tk_timeout = timeout; - if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires)) - rpc_set_queue_timer(queue, timeout); + rpc_set_queue_timer(queue, timeout); list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } -- cgit From 431235818bc3a919ca7487500c67c3144feece80 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:52 -0400 Subject: SUNRPC: Declare RPC timers as TIMER_DEFERRABLE Don't wake idle CPUs only for the purpose of servicing an RPC queue timeout. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/sched.c | 4 +++- net/sunrpc/xprt.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 301e0f7f1dc9..1a12fb03e611 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -249,7 +249,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c queue->maxpriority = nr_queues - 1; rpc_reset_waitqueue_priority(queue); queue->qlen = 0; - timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0); + timer_setup(&queue->timer_list.timer, + __rpc_queue_timer_fn, + TIMER_DEFERRABLE); INIT_LIST_HEAD(&queue->timer_list.list); rpc_assign_waitqueue_name(queue, qname); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 7c3623b17493..36af1a1929af 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1842,7 +1842,9 @@ found: xprt->idle_timeout = 0; INIT_WORK(&xprt->task_cleanup, xprt_autoclose); if (xprt_has_timer(xprt)) - timer_setup(&xprt->timer, xprt_init_autodisconnect, 0); + timer_setup(&xprt->timer, + xprt_init_autodisconnect, + TIMER_DEFERRABLE); else timer_setup(&xprt->timer, NULL, 0); -- cgit From 9e910bff74be819aad751e82270682f3c405d199 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:53 -0400 Subject: SUNRPC: Ensure that the transport layer respect major timeouts Ensure that when in the transport layer, we don't sleep past a major timeout. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 36af1a1929af..642cc0f64e44 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -73,6 +73,15 @@ static void xprt_destroy(struct rpc_xprt *xprt); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); +static unsigned long xprt_request_timeout(const struct rpc_rqst *req) +{ + unsigned long timeout = jiffies + req->rq_timeout; + + if (time_before(timeout, req->rq_majortimeo)) + return timeout; + return req->rq_majortimeo; +} + /** * xprt_register_transport - register a transport implementation * @transport: transport to register @@ -212,7 +221,7 @@ out_sleep: task->tk_status = -EAGAIN; if (RPC_IS_SOFT(task)) rpc_sleep_on_timeout(&xprt->sending, task, NULL, - jiffies + req->rq_timeout); + xprt_request_timeout(req)); else rpc_sleep_on(&xprt->sending, task, NULL); return 0; @@ -279,7 +288,7 @@ out_sleep: task->tk_status = -EAGAIN; if (RPC_IS_SOFT(task)) rpc_sleep_on_timeout(&xprt->sending, task, NULL, - jiffies + req->rq_timeout); + xprt_request_timeout(req)); else rpc_sleep_on(&xprt->sending, task, NULL); return 0; @@ -795,7 +804,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_connected(xprt)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on_timeout(&xprt->pending, task, NULL, - jiffies + task->tk_rqstp->rq_timeout); + xprt_request_timeout(task->tk_rqstp)); if (test_bit(XPRT_CLOSING, &xprt->state)) return; @@ -1087,7 +1096,7 @@ void xprt_wait_for_reply_request_def(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer, - jiffies + req->rq_timeout); + xprt_request_timeout(req)); } EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def); -- cgit From 5ad64b36dda962797ce3ed579a27189ec7482d0d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:54 -0400 Subject: SUNRPC: Add tracking of RPC level errors Add variables to track RPC level errors so that we can distinguish between issue that arose in the RPC transport layer as opposed to those arising from the reply message. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 40 +++++++++++++++++++++++++++------------- net/sunrpc/xprtsock.c | 1 + 2 files changed, 28 insertions(+), 13 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b25f317d0ee2..315f9c9cb72d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1468,6 +1468,7 @@ static int __rpc_restart_call(struct rpc_task *task, void (*action)(struct rpc_task *)) { task->tk_status = 0; + task->tk_rpc_status = 0; task->tk_action = action; return 1; } @@ -1510,6 +1511,19 @@ const char return "no proc"; } +static void +__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status) +{ + task->tk_rpc_status = rpc_status; + rpc_exit(task, tk_status); +} + +static void +rpc_call_rpcerror(struct rpc_task *task, int status) +{ + __rpc_call_rpcerror(task, status, status); +} + /* * 0. Initial state * @@ -1574,7 +1588,7 @@ call_reserveresult(struct rpc_task *task) printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n", __func__, status); - rpc_exit(task, -EIO); + rpc_call_rpcerror(task, -EIO); return; } @@ -1602,7 +1616,7 @@ call_reserveresult(struct rpc_task *task) __func__, status); break; } - rpc_exit(task, status); + rpc_call_rpcerror(task, status); } /* @@ -1670,7 +1684,7 @@ call_refreshresult(struct rpc_task *task) } dprintk("RPC: %5u %s: refresh creds failed with error %d\n", task->tk_pid, __func__, status); - rpc_exit(task, status); + rpc_call_rpcerror(task, status); } /* @@ -1721,7 +1735,7 @@ call_allocate(struct rpc_task *task) if (status == 0) return; if (status != -ENOMEM) { - rpc_exit(task, status); + rpc_call_rpcerror(task, status); return; } @@ -1790,7 +1804,7 @@ call_encode(struct rpc_task *task) task->tk_action = call_refresh; break; default: - rpc_exit(task, task->tk_status); + rpc_call_rpcerror(task, task->tk_status); } return; } else { @@ -1931,7 +1945,7 @@ call_bind_status(struct rpc_task *task) task->tk_pid, -task->tk_status); } - rpc_exit(task, status); + rpc_call_rpcerror(task, status); return; retry_timeout: @@ -1966,7 +1980,7 @@ call_connect(struct rpc_task *task) if (task->tk_status < 0) return; if (task->tk_flags & RPC_TASK_NOCONNECT) { - rpc_exit(task, -ENOTCONN); + rpc_call_rpcerror(task, -ENOTCONN); return; } if (!xprt_prepare_transmit(task)) @@ -2026,7 +2040,7 @@ call_connect_status(struct rpc_task *task) task->tk_action = call_transmit; return; } - rpc_exit(task, status); + rpc_call_rpcerror(task, status); return; out_retry: /* Check for timeouts before looping back to call_bind */ @@ -2111,7 +2125,7 @@ call_transmit_status(struct rpc_task *task) if (!task->tk_msg.rpc_proc->p_proc) trace_xprt_ping(task->tk_xprt, task->tk_status); - rpc_exit(task, task->tk_status); + rpc_call_rpcerror(task, task->tk_status); return; } /* fall through */ @@ -2275,7 +2289,7 @@ call_status(struct rpc_task *task) rpc_check_timeout(task); return; out_exit: - rpc_exit(task, status); + rpc_call_rpcerror(task, status); } static bool @@ -2299,7 +2313,7 @@ rpc_check_timeout(struct rpc_task *task) task->tk_timeouts++; if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) { - rpc_exit(task, -ETIMEDOUT); + rpc_call_rpcerror(task, -ETIMEDOUT); return; } @@ -2310,9 +2324,9 @@ rpc_check_timeout(struct rpc_task *task) task->tk_xprt->servername); } if (task->tk_flags & RPC_TASK_TIMEOUT) - rpc_exit(task, -ETIMEDOUT); + rpc_call_rpcerror(task, -ETIMEDOUT); else - rpc_exit(task, -EIO); + __rpc_call_rpcerror(task, -EIO, -ETIMEDOUT); return; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b4b4b8db143c..c69951ed2ebc 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2017,6 +2017,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) * we'll need to figure out how to pass a namespace to * connect. */ + task->tk_rpc_status = -ENOTCONN; rpc_exit(task, -ENOTCONN); return; } -- cgit From e4ec48d3cc6139f4c1a934ff25d440cd4d50279f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:55 -0400 Subject: SUNRPC: Make "no retrans timeout" soft tasks behave like softconn for timeouts If a soft NFSv4 request is sent, then we don't need it to time out unless the connection breaks. The reason is that as long as the connection is unbroken, the protocol states that the server is not allowed to drop the request. IOW: as long as the connection remains unbroken, the client may assume that all transmitted RPC requests are being processed by the server, and that retransmissions and timeouts of those requests are unwarranted. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 315f9c9cb72d..43d6815f7391 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2318,6 +2318,15 @@ rpc_check_timeout(struct rpc_task *task) } if (RPC_IS_SOFT(task)) { + /* + * Once a "no retrans timeout" soft tasks (a.k.a NFSv4) has + * been sent, it should time out only if the transport + * connection gets terminally broken. + */ + if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) && + rpc_check_connected(task->tk_rqstp)) + return; + if (clnt->cl_chatty) { printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_program->name, -- cgit From da953063bdce465d941751d981e8d3ac5e92906c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:56 -0400 Subject: SUNRPC: Start the first major timeout calculation at task creation When calculating the major timeout for a new task, when we know that the connection has been broken, use the task->tk_start to ensure that we also take into account the time spent waiting for a slot or session slot. This ensures that we fail over soft requests relatively quickly once the connection has actually been broken, and the first requests have started to fail. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 642cc0f64e44..bc1c8247750d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -569,18 +569,44 @@ bool xprt_write_space(struct rpc_xprt *xprt) } EXPORT_SYMBOL_GPL(xprt_write_space); -static void xprt_reset_majortimeo(struct rpc_rqst *req) +static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime) +{ + s64 delta = ktime_to_ns(ktime_get() - abstime); + return likely(delta >= 0) ? + jiffies - nsecs_to_jiffies(delta) : + jiffies + nsecs_to_jiffies(-delta); +} + +static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req) { const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; + unsigned long majortimeo = req->rq_timeout; - req->rq_majortimeo = req->rq_timeout; if (to->to_exponential) - req->rq_majortimeo <<= to->to_retries; + majortimeo <<= to->to_retries; + else + majortimeo += to->to_increment * to->to_retries; + if (majortimeo > to->to_maxval || majortimeo == 0) + majortimeo = to->to_maxval; + return majortimeo; +} + +static void xprt_reset_majortimeo(struct rpc_rqst *req) +{ + req->rq_majortimeo += xprt_calc_majortimeo(req); +} + +static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) +{ + unsigned long time_init; + struct rpc_xprt *xprt = req->rq_xprt; + + if (likely(xprt && xprt_connected(xprt))) + time_init = jiffies; else - req->rq_majortimeo += to->to_increment * to->to_retries; - if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0) - req->rq_majortimeo = to->to_maxval; - req->rq_majortimeo += jiffies; + time_init = xprt_abs_ktime_to_jiffies(task->tk_start); + req->rq_timeout = task->tk_client->cl_timeout->to_initval; + req->rq_majortimeo = time_init + xprt_calc_majortimeo(req); } /** @@ -997,7 +1023,6 @@ xprt_request_enqueue_receive(struct rpc_task *task) set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); spin_unlock(&xprt->queue_lock); - xprt_reset_majortimeo(req); /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); } @@ -1631,7 +1656,6 @@ xprt_request_init(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; - req->rq_timeout = task->tk_client->cl_timeout->to_initval; req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; @@ -1644,7 +1668,7 @@ xprt_request_init(struct rpc_task *task) req->rq_snd_buf.bvec = NULL; req->rq_rcv_buf.bvec = NULL; req->rq_release_snd_buf = NULL; - xprt_reset_majortimeo(req); + xprt_init_majortimeo(task, req); dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, req, ntohl(req->rq_xid)); } -- cgit From 0729d995f2a2726598642d552ebe916b43aef73d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:57 -0400 Subject: SUNRPC: Ensure to ratelimit the "server not responding" syslog messages In particular, the timeout messages can be very noisy, so we ought to ratelimit them in order to avoid spamming the syslog. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 43d6815f7391..b16322eb6c42 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2328,7 +2328,8 @@ rpc_check_timeout(struct rpc_task *task) return; if (clnt->cl_chatty) { - printk(KERN_NOTICE "%s: server %s not responding, timed out\n", + pr_notice_ratelimited( + "%s: server %s not responding, timed out\n", clnt->cl_program->name, task->tk_xprt->servername); } @@ -2342,9 +2343,10 @@ rpc_check_timeout(struct rpc_task *task) if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; if (clnt->cl_chatty) { - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", - clnt->cl_program->name, - task->tk_xprt->servername); + pr_notice_ratelimited( + "%s: server %s not responding, still trying\n", + clnt->cl_program->name, + task->tk_xprt->servername); } } rpc_force_rebind(clnt); @@ -2374,7 +2376,7 @@ call_decode(struct rpc_task *task) if (task->tk_flags & RPC_CALL_MAJORSEEN) { if (clnt->cl_chatty) { - printk(KERN_NOTICE "%s: server %s OK\n", + pr_notice_ratelimited("%s: server %s OK\n", clnt->cl_program->name, task->tk_xprt->servername); } -- cgit From ae6ec918474597a13a2648c54b6f12fb8cf0a55e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:58:58 -0400 Subject: SUNRPC: Add the 'softerr' rpc_client flag Add the 'softerr' rpc client flag that sets the RPC_TASK_TIMEOUT flag on all new rpc tasks that are attached to that rpc client. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b16322eb6c42..e933f1185317 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -484,8 +484,11 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, } clnt->cl_softrtry = 1; - if (args->flags & RPC_CLNT_CREATE_HARDRTRY) + if (args->flags & (RPC_CLNT_CREATE_HARDRTRY|RPC_CLNT_CREATE_SOFTERR)) { clnt->cl_softrtry = 0; + if (args->flags & RPC_CLNT_CREATE_SOFTERR) + clnt->cl_softerr = 1; + } if (args->flags & RPC_CLNT_CREATE_AUTOBIND) clnt->cl_autobind = 1; @@ -623,6 +626,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_softrtry = clnt->cl_softrtry; + new->cl_softerr = clnt->cl_softerr; new->cl_noretranstimeo = clnt->cl_noretranstimeo; new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; @@ -1001,6 +1005,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; + if (clnt->cl_softerr) + task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; if (atomic_read(&clnt->cl_swapper)) -- cgit From 52db6f9a0cd8f6d433a0687aae4f21f209352510 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:38:55 -0400 Subject: SUNRPC: Avoid digging into the ATOMIC pool Page allocation requests made when the SPARSE_PAGES flag is set are allowed to fail, and are not critical. No need to spend a rare resource. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/socklib.c | 2 +- net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 7e55cfc69697..9faea12624a6 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -106,7 +106,7 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb /* ACL likes to be lazy in allocating pages - ACLs * are small by default but can get huge. */ if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) { - *ppage = alloc_page(GFP_ATOMIC); + *ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN); if (unlikely(*ppage == NULL)) { if (copied == 0) copied = -ENOMEM; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 6c1fb270f127..b759b169dadf 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -238,7 +238,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, */ if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) { if (!*ppages) - *ppages = alloc_page(GFP_ATOMIC); + *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN); if (!*ppages) return -ENOBUFS; } -- cgit From b2ca473b920dfbaad7c4f9eb5043258ef71f321f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:00 -0400 Subject: xprtrdma: Fix an frwr_map recovery nit After a DMA map failure in frwr_map, mark the MR so that recycling won't attempt to DMA unmap it. Signed-off-by: Chuck Lever Fixes: e2f34e26710b ("xprtrdma: Yet another double DMA-unmap") Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 52cb6c1b0c2b..a2a2e01cb5dd 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -466,7 +466,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, return seg; out_dmamap_err: - frwr->fr_state = FRWR_IS_INVALID; + mr->mr_dir = DMA_NONE; trace_xprtrdma_frwr_sgerr(mr, i); rpcrdma_mr_put(mr); return ERR_PTR(-EIO); -- cgit From 1769e6a816dff50d960271eb780e0a40b739b256 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:05 -0400 Subject: xprtrdma: Clean up rpcrdma_create_req() Eventually, I'd like to invoke rpcrdma_create_req() during the call_reserve step. Memory allocation there probably needs to use GFP_NOIO. Therefore a set of GFP flags needs to be passed in. As an additional clean up, just return a pointer or NULL, because the only error return code here is -ENOMEM. Lastly, clean up the function names to be consistent with the pattern: "rpcrdma" _ object-type _ action Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 6 +++--- net/sunrpc/xprtrdma/verbs.c | 29 ++++++++++++++++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 3 ++- 3 files changed, 21 insertions(+), 17 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index d79b18c1f4cd..713961a63c49 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -31,9 +31,9 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, struct rpcrdma_regbuf *rb; size_t size; - req = rpcrdma_create_req(r_xprt); - if (IS_ERR(req)) - return PTR_ERR(req); + req = rpcrdma_req_create(r_xprt, GFP_KERNEL); + if (!req) + return -ENOMEM; rqst = &req->rl_slot; rqst->rq_xprt = xprt; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 30cfc0efe699..71fc41f9a8eb 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -996,22 +996,27 @@ rpcrdma_mr_refresh_worker(struct work_struct *work) rpcrdma_mrs_create(r_xprt); } -struct rpcrdma_req * -rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) +/** + * rpcrdma_req_create - Allocate an rpcrdma_req object + * @r_xprt: controlling r_xprt + * @flags: GFP flags passed to memory allocators + * + * Returns an allocated and fully initialized rpcrdma_req or NULL. + */ +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, gfp_t flags) { struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; struct rpcrdma_regbuf *rb; struct rpcrdma_req *req; - req = kzalloc(sizeof(*req), GFP_KERNEL); + req = kzalloc(sizeof(*req), flags); if (req == NULL) - return ERR_PTR(-ENOMEM); + return NULL; - rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, - DMA_TO_DEVICE, GFP_KERNEL); + rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags); if (IS_ERR(rb)) { kfree(req); - return ERR_PTR(-ENOMEM); + return NULL; } req->rl_rdmabuf = rb; xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); @@ -1086,16 +1091,14 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) INIT_LIST_HEAD(&buf->rb_send_bufs); INIT_LIST_HEAD(&buf->rb_allreqs); + + rc = -ENOMEM; for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; - req = rpcrdma_create_req(r_xprt); - if (IS_ERR(req)) { - dprintk("RPC: %s: request buffer %d alloc" - " failed\n", __func__, i); - rc = PTR_ERR(req); + req = rpcrdma_req_create(r_xprt, GFP_KERNEL); + if (!req) goto out; - } list_add(&req->rl_list, &buf->rb_send_bufs); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 10f6593e1a6a..539558fc9c62 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -528,7 +528,8 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, /* * Buffer calls - xprtrdma/verbs.c */ -struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, + gfp_t flags); void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); -- cgit From 23146500b32fbee7eaa57c5002fcd64e5d9b32ca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:11 -0400 Subject: xprtrdma: Clean up rpcrdma_create_rep() and rpcrdma_destroy_rep() For code legibility, clean up the function names to be consistent with the pattern: "rpcrdma" _ object-type _ action Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 71fc41f9a8eb..caa6a5df12b0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -76,7 +76,6 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); -static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); @@ -1029,25 +1028,20 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, gfp_t flags) return req; } -static int -rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) +static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; - int rc; - rc = -ENOMEM; rep = kzalloc(sizeof(*rep), GFP_KERNEL); if (rep == NULL) goto out; rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize, DMA_FROM_DEVICE, GFP_KERNEL); - if (IS_ERR(rep->rr_rdmabuf)) { - rc = PTR_ERR(rep->rr_rdmabuf); + if (IS_ERR(rep->rr_rdmabuf)) goto out_free; - } xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base, rdmab_length(rep->rr_rdmabuf)); @@ -1063,12 +1057,12 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) spin_lock(&buf->rb_lock); list_add(&rep->rr_list, &buf->rb_recv_bufs); spin_unlock(&buf->rb_lock); - return 0; + return true; out_free: kfree(rep); out: - return rc; + return false; } int @@ -1124,8 +1118,7 @@ out: return rc; } -static void -rpcrdma_destroy_rep(struct rpcrdma_rep *rep) +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) { rpcrdma_free_regbuf(rep->rr_rdmabuf); kfree(rep); @@ -1205,7 +1198,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) rep = list_first_entry(&buf->rb_recv_bufs, struct rpcrdma_rep, rr_list); list_del(&rep->rr_list); - rpcrdma_destroy_rep(rep); + rpcrdma_rep_destroy(rep); } while (!list_empty(&buf->rb_send_bufs)) { @@ -1334,7 +1327,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) } spin_unlock(&buffers->rb_lock); if (rep) - rpcrdma_destroy_rep(rep); + rpcrdma_rep_destroy(rep); } /* @@ -1351,7 +1344,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) list_add(&rep->rr_list, &buffers->rb_recv_bufs); spin_unlock(&buffers->rb_lock); } else { - rpcrdma_destroy_rep(rep); + rpcrdma_rep_destroy(rep); } } @@ -1500,7 +1493,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) list_del(&rep->rr_list); spin_unlock(&buf->rb_lock); if (!rep) { - if (rpcrdma_create_rep(r_xprt, temp)) + if (!rpcrdma_rep_create(r_xprt, temp)) break; continue; } -- cgit From 8cec3dba76a4d9d7da4a7219663b8c4333f14522 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:16 -0400 Subject: xprtrdma: rpcrdma_regbuf alignment Allocate the struct rpcrdma_regbuf separately from the I/O buffer to better guarantee the alignment of the I/O buffer and eliminate the wasted space between the rpcrdma_regbuf metadata and the buffer itself. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 6 +++--- net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++-- net/sunrpc/xprtrdma/transport.c | 8 ++++---- net/sunrpc/xprtrdma/verbs.c | 27 ++++++++++++++++----------- net/sunrpc/xprtrdma/xprt_rdma.h | 19 ++++++++++--------- 5 files changed, 35 insertions(+), 29 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 713961a63c49..6170ec7ba504 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -45,10 +45,10 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, size = r_xprt->rx_data.inline_rsize; rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); - if (IS_ERR(rb)) + if (!rb) goto out_fail; req->rl_sendbuf = rb; - xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, + xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(rb), min_t(size_t, size, PAGE_SIZE)); } return 0; @@ -123,7 +123,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf, - req->rl_rdmabuf->rg_base, rqst); + rdmab_data(req->rl_rdmabuf), rqst); p = xdr_reserve_space(&req->rl_stream, 28); if (unlikely(!p)) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index b759b169dadf..cf99c55add1b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -747,8 +747,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) int ret; rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); - xdr_init_encode(xdr, &req->rl_hdrbuf, - req->rl_rdmabuf->rg_base, rqst); + xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf), + rqst); /* Fixed header fields */ ret = -EMSGSIZE; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 7e73abe01cfe..ced9812940f7 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -595,7 +595,7 @@ rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, return true; rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); - if (IS_ERR(rb)) + if (!rb) return false; rpcrdma_free_regbuf(req->rl_sendbuf); @@ -625,7 +625,7 @@ rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, return true; rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); - if (IS_ERR(rb)) + if (!rb) return false; rpcrdma_free_regbuf(req->rl_recvbuf); @@ -660,8 +660,8 @@ xprt_rdma_allocate(struct rpc_task *task) if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; - rqst->rq_buffer = req->rl_sendbuf->rg_base; - rqst->rq_rbuffer = req->rl_recvbuf->rg_base; + rqst->rq_buffer = rdmab_data(req->rl_sendbuf); + rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf); trace_xprtrdma_op_allocate(task, req); return 0; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index caa6a5df12b0..f88fd3934f56 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1013,12 +1013,12 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, gfp_t flags) return NULL; rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags); - if (IS_ERR(rb)) { + if (!rb) { kfree(req); return NULL; } req->rl_rdmabuf = rb; - xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); + xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); req->rl_buffer = buffer; INIT_LIST_HEAD(&req->rl_registered); @@ -1040,9 +1040,9 @@ static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize, DMA_FROM_DEVICE, GFP_KERNEL); - if (IS_ERR(rep->rr_rdmabuf)) + if (!rep->rr_rdmabuf) goto out_free; - xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base, + xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), rdmab_length(rep->rr_rdmabuf)); rep->rr_cqe.done = rpcrdma_wc_receive; @@ -1354,8 +1354,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) * @direction: direction of data movement * @flags: GFP flags * - * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that - * can be persistently DMA-mapped for I/O. + * Returns a pointer to a rpcrdma_regbuf object, or NULL. * * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for * receiving the payload of RDMA RECV operations. During Long Calls @@ -1367,14 +1366,18 @@ rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, { struct rpcrdma_regbuf *rb; - rb = kmalloc(sizeof(*rb) + size, flags); - if (rb == NULL) - return ERR_PTR(-ENOMEM); + rb = kmalloc(sizeof(*rb), flags); + if (!rb) + return NULL; + rb->rg_data = kmalloc(size, flags); + if (!rb->rg_data) { + kfree(rb); + return NULL; + } rb->rg_device = NULL; rb->rg_direction = direction; rb->rg_iov.length = size; - return rb; } @@ -1392,7 +1395,7 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) return false; rb->rg_iov.addr = ib_dma_map_single(device, - (void *)rb->rg_base, + rdmab_data(rb), rdmab_length(rb), rb->rg_direction); if (ib_dma_mapping_error(device, rdmab_addr(rb))) { @@ -1427,6 +1430,8 @@ void rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) { rpcrdma_dma_unmap_regbuf(rb); + if (rb) + kfree(rb->rg_data); kfree(rb); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 539558fc9c62..1af9674572bd 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -121,33 +121,34 @@ struct rpcrdma_regbuf { struct ib_sge rg_iov; struct ib_device *rg_device; enum dma_data_direction rg_direction; - __be32 rg_base[0] __attribute__ ((aligned(256))); + void *rg_data; }; -static inline u64 -rdmab_addr(struct rpcrdma_regbuf *rb) +static inline u64 rdmab_addr(struct rpcrdma_regbuf *rb) { return rb->rg_iov.addr; } -static inline u32 -rdmab_length(struct rpcrdma_regbuf *rb) +static inline u32 rdmab_length(struct rpcrdma_regbuf *rb) { return rb->rg_iov.length; } -static inline u32 -rdmab_lkey(struct rpcrdma_regbuf *rb) +static inline u32 rdmab_lkey(struct rpcrdma_regbuf *rb) { return rb->rg_iov.lkey; } -static inline struct ib_device * -rdmab_device(struct rpcrdma_regbuf *rb) +static inline struct ib_device *rdmab_device(struct rpcrdma_regbuf *rb) { return rb->rg_device; } +static inline void *rdmab_data(const struct rpcrdma_regbuf *rb) +{ + return rb->rg_data; +} + #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) /* To ensure a transport can always make forward progress, -- cgit From bb93a1ae2bf4f6eb3cedf05a2ea4a2e6a80712e6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:21 -0400 Subject: xprtrdma: Allocate req's regbufs at xprt create time Allocating an rpcrdma_req's regbufs at xprt create time enables a pair of micro-optimizations: First, if these regbufs are always there, we can eliminate two conditional branches from the hot xprt_rdma_allocate path. Second, by allocating a 1KB buffer, it places a lower bound on the size of these buffers, without adding yet another conditional branch. The lower bound reduces the number of hardway re- allocations. In fact, for some workloads it completely eliminates hardway allocations. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 18 ++++-------------- net/sunrpc/xprtrdma/transport.c | 4 ++-- net/sunrpc/xprtrdma/verbs.c | 34 ++++++++++++++++++++++++++-------- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 4 files changed, 33 insertions(+), 25 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 6170ec7ba504..e1a125ad888d 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -28,10 +28,10 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, unsigned int i; for (i = 0; i < (count << 1); i++) { - struct rpcrdma_regbuf *rb; size_t size; - req = rpcrdma_req_create(r_xprt, GFP_KERNEL); + size = min_t(size_t, r_xprt->rx_data.inline_rsize, PAGE_SIZE); + req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); if (!req) return -ENOMEM; rqst = &req->rl_slot; @@ -42,20 +42,10 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, spin_lock(&xprt->bc_pa_lock); list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); spin_unlock(&xprt->bc_pa_lock); - - size = r_xprt->rx_data.inline_rsize; - rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); - if (!rb) - goto out_fail; - req->rl_sendbuf = rb; - xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(rb), - min_t(size_t, size, PAGE_SIZE)); + xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(req->rl_sendbuf), + size); } return 0; - -out_fail: - rpcrdma_req_destroy(req); - return -ENOMEM; } /** diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ced9812940f7..a5da43f3b035 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -591,7 +591,7 @@ rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, { struct rpcrdma_regbuf *rb; - if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size) + if (likely(rdmab_length(req->rl_sendbuf) >= size)) return true; rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); @@ -621,7 +621,7 @@ rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, { struct rpcrdma_regbuf *rb; - if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size) + if (likely(rdmab_length(req->rl_recvbuf) >= size)) return true; rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index f88fd3934f56..77e0f21c9017 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -998,11 +998,13 @@ rpcrdma_mr_refresh_worker(struct work_struct *work) /** * rpcrdma_req_create - Allocate an rpcrdma_req object * @r_xprt: controlling r_xprt + * @size: initial size, in bytes, of send and receive buffers * @flags: GFP flags passed to memory allocators * * Returns an allocated and fully initialized rpcrdma_req or NULL. */ -struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, gfp_t flags) +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, + gfp_t flags) { struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; struct rpcrdma_regbuf *rb; @@ -1010,22 +1012,37 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, gfp_t flags) req = kzalloc(sizeof(*req), flags); if (req == NULL) - return NULL; + goto out1; rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags); - if (!rb) { - kfree(req); - return NULL; - } + if (!rb) + goto out2; req->rl_rdmabuf = rb; xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); + + req->rl_sendbuf = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); + if (!req->rl_sendbuf) + goto out3; + + req->rl_recvbuf = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); + if (!req->rl_recvbuf) + goto out4; + req->rl_buffer = buffer; INIT_LIST_HEAD(&req->rl_registered); - spin_lock(&buffer->rb_lock); list_add(&req->rl_all, &buffer->rb_allreqs); spin_unlock(&buffer->rb_lock); return req; + +out4: + kfree(req->rl_sendbuf); +out3: + kfree(req->rl_rdmabuf); +out2: + kfree(req); +out1: + return NULL; } static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) @@ -1090,7 +1107,8 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; - req = rpcrdma_req_create(r_xprt, GFP_KERNEL); + req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE, + GFP_KERNEL); if (!req) goto out; list_add(&req->rl_list, &buf->rb_send_bufs); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 1af9674572bd..03d5ce443bf0 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -529,7 +529,7 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, /* * Buffer calls - xprtrdma/verbs.c */ -struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, gfp_t flags); void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); -- cgit From 0f665ceb71a20520bdce76fb63ad68c21841aa62 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:27 -0400 Subject: xprtrdma: De-duplicate "allocate new, free old regbuf" Clean up by providing an API to do this common task. At this point, the difference between rpcrdma_get_sendbuf and rpcrdma_get_recvbuf has become tiny. These can be collapsed into a single helper. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 59 +++++++++-------------------------------- net/sunrpc/xprtrdma/verbs.c | 25 +++++++++++++++++ net/sunrpc/xprtrdma/xprt_rdma.h | 2 ++ 3 files changed, 39 insertions(+), 47 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a5da43f3b035..8cf4fa36ed66 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -585,52 +585,15 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) rpc_wake_up_next(&xprt->backlog); } -static bool -rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, - size_t size, gfp_t flags) +static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_regbuf *rb, size_t size, + gfp_t flags) { - struct rpcrdma_regbuf *rb; - - if (likely(rdmab_length(req->rl_sendbuf) >= size)) - return true; - - rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); - if (!rb) - return false; - - rpcrdma_free_regbuf(req->rl_sendbuf); - r_xprt->rx_stats.hardway_register_count += size; - req->rl_sendbuf = rb; - return true; -} - -/* The rq_rcv_buf is used only if a Reply chunk is necessary. - * The decision to use a Reply chunk is made later in - * rpcrdma_marshal_req. This buffer is registered at that time. - * - * Otherwise, the associated RPC Reply arrives in a separate - * Receive buffer, arbitrarily chosen by the HCA. The buffer - * allocated here for the RPC Reply is not utilized in that - * case. See rpcrdma_inline_fixup. - * - * A regbuf is used here to remember the buffer size. - */ -static bool -rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, - size_t size, gfp_t flags) -{ - struct rpcrdma_regbuf *rb; - - if (likely(rdmab_length(req->rl_recvbuf) >= size)) - return true; - - rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); - if (!rb) - return false; - - rpcrdma_free_regbuf(req->rl_recvbuf); - r_xprt->rx_stats.hardway_register_count += size; - req->rl_recvbuf = rb; + if (unlikely(rdmab_length(rb) < size)) { + if (!rpcrdma_regbuf_realloc(rb, size, flags)) + return false; + r_xprt->rx_stats.hardway_register_count += size; + } return true; } @@ -655,9 +618,11 @@ xprt_rdma_allocate(struct rpc_task *task) if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; - if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) + if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, + flags)) goto out_fail; - if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) + if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize, + flags)) goto out_fail; rqst->rq_buffer = rdmab_data(req->rl_sendbuf); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 77e0f21c9017..734dfe5d18bd 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1399,6 +1399,31 @@ rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, return rb; } +/** + * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer + * @rb: regbuf to reallocate + * @size: size of buffer to be allocated, in bytes + * @flags: GFP flags + * + * Returns true if reallocation was successful. If false is + * returned, @rb is left untouched. + */ +bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) +{ + void *buf; + + buf = kmalloc(size, flags); + if (!buf) + return false; + + rpcrdma_dma_unmap_regbuf(rb); + kfree(rb->rg_data); + + rb->rg_data = buf; + rb->rg_iov.length = size; + return true; +} + /** * __rpcrdma_map_regbuf - DMA-map a regbuf * @ia: controlling rpcrdma_ia diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 03d5ce443bf0..751d4761d682 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -552,6 +552,8 @@ void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, gfp_t); +bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, + gfp_t flags); bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); -- cgit From d2832af38dfd1d3b135b13c6106b2c5de16a6747 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:32 -0400 Subject: xprtrdma: Clean up regbuf helpers For code legibility, clean up the function names to be consistent with the pattern: "rpcrdma" _ object-type _ action Also rpcrdma_regbuf_alloc and rpcrdma_regbuf_free no longer have any callers outside of verbs.c, and can thus be made static. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 47 +++++++++++----------- net/sunrpc/xprtrdma/verbs.c | 88 ++++++++++++++++++----------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 27 ++++++++----- 3 files changed, 80 insertions(+), 82 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index cf99c55add1b..231a44b9c152 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -536,22 +536,21 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc) /* Prepare an SGE for the RPC-over-RDMA transport header. */ -static bool -rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, - u32 len) +static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, u32 len) { struct rpcrdma_sendctx *sc = req->rl_sendctx; struct rpcrdma_regbuf *rb = req->rl_rdmabuf; struct ib_sge *sge = sc->sc_sges; - if (!rpcrdma_dma_map_regbuf(ia, rb)) + if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) goto out_regbuf; sge->addr = rdmab_addr(rb); sge->length = len; sge->lkey = rdmab_lkey(rb); - ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, - sge->length, DMA_TO_DEVICE); + ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, + DMA_TO_DEVICE); sc->sc_wr.num_sge++; return true; @@ -563,22 +562,21 @@ out_regbuf: /* Prepare the Send SGEs. The head and tail iovec, and each entry * in the page list, gets its own SGE. */ -static bool -rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, - struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) +static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, + struct xdr_buf *xdr, + enum rpcrdma_chunktype rtype) { struct rpcrdma_sendctx *sc = req->rl_sendctx; unsigned int sge_no, page_base, len, remaining; struct rpcrdma_regbuf *rb = req->rl_sendbuf; - struct ib_device *device = ia->ri_device; struct ib_sge *sge = sc->sc_sges; - u32 lkey = ia->ri_pd->local_dma_lkey; struct page *page, **ppages; /* The head iovec is straightforward, as it is already * DMA-mapped. Sync the content that has changed. */ - if (!rpcrdma_dma_map_regbuf(ia, rb)) + if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) goto out_regbuf; sge_no = 1; sge[sge_no].addr = rdmab_addr(rb); @@ -626,13 +624,14 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, goto out_mapping_overflow; len = min_t(u32, PAGE_SIZE - page_base, remaining); - sge[sge_no].addr = ib_dma_map_page(device, *ppages, - page_base, len, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(device, sge[sge_no].addr)) + sge[sge_no].addr = + ib_dma_map_page(rdmab_device(rb), *ppages, + page_base, len, DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdmab_device(rb), + sge[sge_no].addr)) goto out_mapping_err; sge[sge_no].length = len; - sge[sge_no].lkey = lkey; + sge[sge_no].lkey = rdmab_lkey(rb); sc->sc_unmap_count++; ppages++; @@ -653,13 +652,13 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, map_tail: sge_no++; - sge[sge_no].addr = ib_dma_map_page(device, page, - page_base, len, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(device, sge[sge_no].addr)) + sge[sge_no].addr = + ib_dma_map_page(rdmab_device(rb), page, page_base, len, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr)) goto out_mapping_err; sge[sge_no].length = len; - sge[sge_no].lkey = lkey; + sge[sge_no].lkey = rdmab_lkey(rb); sc->sc_unmap_count++; } @@ -707,11 +706,11 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, req->rl_sendctx->sc_req = req; __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags); - if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen)) + if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) return -EIO; if (rtype != rpcrdma_areadch) - if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype)) + if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype)) return -EIO; return 0; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 734dfe5d18bd..81548fc2ee7f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -76,7 +76,11 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); -static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); +static struct rpcrdma_regbuf * +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, + gfp_t flags); +static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); +static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); /* Wait for outstanding transport work to finish. @@ -437,11 +441,11 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) * mappings and MRs are gone. */ list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list) - rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf); + rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); list_for_each_entry(req, &buf->rb_allreqs, rl_all) { - rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf); - rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); - rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); + rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf); + rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); + rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); } rpcrdma_mrs_destroy(buf); ib_dealloc_pd(ia->ri_pd); @@ -1014,17 +1018,17 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, if (req == NULL) goto out1; - rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags); + rb = rpcrdma_regbuf_alloc(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags); if (!rb) goto out2; req->rl_rdmabuf = rb; xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); - req->rl_sendbuf = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); + req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); if (!req->rl_sendbuf) goto out3; - req->rl_recvbuf = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); + req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); if (!req->rl_recvbuf) goto out4; @@ -1055,7 +1059,7 @@ static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) if (rep == NULL) goto out; - rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize, + rep->rr_rdmabuf = rpcrdma_regbuf_alloc(cdata->inline_rsize, DMA_FROM_DEVICE, GFP_KERNEL); if (!rep->rr_rdmabuf) goto out_free; @@ -1138,7 +1142,7 @@ out: static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) { - rpcrdma_free_regbuf(rep->rr_rdmabuf); + rpcrdma_regbuf_free(rep->rr_rdmabuf); kfree(rep); } @@ -1154,9 +1158,9 @@ rpcrdma_req_destroy(struct rpcrdma_req *req) { list_del(&req->rl_all); - rpcrdma_free_regbuf(req->rl_recvbuf); - rpcrdma_free_regbuf(req->rl_sendbuf); - rpcrdma_free_regbuf(req->rl_rdmabuf); + rpcrdma_regbuf_free(req->rl_recvbuf); + rpcrdma_regbuf_free(req->rl_sendbuf); + rpcrdma_regbuf_free(req->rl_rdmabuf); kfree(req); } @@ -1366,20 +1370,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) } } -/** - * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers - * @size: size of buffer to be allocated, in bytes - * @direction: direction of data movement - * @flags: GFP flags - * - * Returns a pointer to a rpcrdma_regbuf object, or NULL. +/* Returns a pointer to a rpcrdma_regbuf object, or NULL. * * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for * receiving the payload of RDMA RECV operations. During Long Calls * or Replies they may be registered externally via frwr_map. */ -struct rpcrdma_regbuf * -rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, +static struct rpcrdma_regbuf * +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, gfp_t flags) { struct rpcrdma_regbuf *rb; @@ -1416,7 +1414,7 @@ bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) if (!buf) return false; - rpcrdma_dma_unmap_regbuf(rb); + rpcrdma_regbuf_dma_unmap(rb); kfree(rb->rg_data); rb->rg_data = buf; @@ -1425,34 +1423,33 @@ bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) } /** - * __rpcrdma_map_regbuf - DMA-map a regbuf - * @ia: controlling rpcrdma_ia + * __rpcrdma_regbuf_dma_map - DMA-map a regbuf + * @r_xprt: controlling transport instance * @rb: regbuf to be mapped + * + * Returns true if the buffer is now DMA mapped to @r_xprt's device */ -bool -__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_regbuf *rb) { - struct ib_device *device = ia->ri_device; + struct ib_device *device = r_xprt->rx_ia.ri_device; if (rb->rg_direction == DMA_NONE) return false; - rb->rg_iov.addr = ib_dma_map_single(device, - rdmab_data(rb), - rdmab_length(rb), - rb->rg_direction); + rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb), + rdmab_length(rb), rb->rg_direction); if (ib_dma_mapping_error(device, rdmab_addr(rb))) { trace_xprtrdma_dma_maperr(rdmab_addr(rb)); return false; } rb->rg_device = device; - rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; + rb->rg_iov.lkey = r_xprt->rx_ia.ri_pd->local_dma_lkey; return true; } -static void -rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) +static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb) { if (!rb) return; @@ -1460,19 +1457,14 @@ rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) if (!rpcrdma_regbuf_is_mapped(rb)) return; - ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), - rdmab_length(rb), rb->rg_direction); + ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb), + rb->rg_direction); rb->rg_device = NULL; } -/** - * rpcrdma_free_regbuf - deregister and free registered buffer - * @rb: regbuf to be deregistered and freed - */ -void -rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) +static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) { - rpcrdma_dma_unmap_regbuf(rb); + rpcrdma_regbuf_dma_unmap(rb); if (rb) kfree(rb->rg_data); kfree(rb); @@ -1547,11 +1539,9 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) } rb = rep->rr_rdmabuf; - if (!rpcrdma_regbuf_is_mapped(rb)) { - if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) { - rpcrdma_recv_buffer_put(rep); - break; - } + if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) { + rpcrdma_recv_buffer_put(rep); + break; } trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 751d4761d682..b942c9d322a7 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -550,25 +550,34 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); -struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, - gfp_t); bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags); -bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); -void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); +bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_regbuf *rb); -static inline bool -rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) +/** + * rpcrdma_regbuf_is_mapped - check if buffer is DMA mapped + * + * Returns true if the buffer is now mapped to rb->rg_device. + */ +static inline bool rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) { return rb->rg_device != NULL; } -static inline bool -rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +/** + * rpcrdma_regbuf_dma_map - DMA-map a regbuf + * @r_xprt: controlling transport instance + * @rb: regbuf to be mapped + * + * Returns true if the buffer is currently DMA mapped. + */ +static inline bool rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_regbuf *rb) { if (likely(rpcrdma_regbuf_is_mapped(rb))) return true; - return __rpcrdma_dma_map_regbuf(ia, rb); + return __rpcrdma_regbuf_dma_map(r_xprt, rb); } /* -- cgit From 3f9c7e76934790c53a48b11c7ad54770cd3ae50d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:37 -0400 Subject: xprtrdma: Backchannel can use GFP_KERNEL allocations The Receive handler runs in process context, thus can use on-demand GFP_KERNEL allocations instead of pre-allocation. This makes the xprtrdma backchannel independent of the number of backchannel session slots provisioned by the Upper Layer protocol. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 104 +++++++++++++++----------------------- 1 file changed, 40 insertions(+), 64 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index e1a125ad888d..ae51ef6a897a 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -19,35 +19,6 @@ #undef RPCRDMA_BACKCHANNEL_DEBUG -static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, - unsigned int count) -{ - struct rpc_xprt *xprt = &r_xprt->rx_xprt; - struct rpcrdma_req *req; - struct rpc_rqst *rqst; - unsigned int i; - - for (i = 0; i < (count << 1); i++) { - size_t size; - - size = min_t(size_t, r_xprt->rx_data.inline_rsize, PAGE_SIZE); - req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); - if (!req) - return -ENOMEM; - rqst = &req->rl_slot; - - rqst->rq_xprt = xprt; - INIT_LIST_HEAD(&rqst->rq_bc_list); - __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); - spin_lock(&xprt->bc_pa_lock); - list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); - spin_unlock(&xprt->bc_pa_lock); - xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(req->rl_sendbuf), - size); - } - return 0; -} - /** * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests * @xprt: transport associated with these backchannel resources @@ -58,34 +29,10 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - int rc; - - /* The backchannel reply path returns each rpc_rqst to the - * bc_pa_list _after_ the reply is sent. If the server is - * faster than the client, it can send another backward - * direction request before the rpc_rqst is returned to the - * list. The client rejects the request in this case. - * - * Twice as many rpc_rqsts are prepared to ensure there is - * always an rpc_rqst available as soon as a reply is sent. - */ - if (reqs > RPCRDMA_BACKWARD_WRS >> 1) - goto out_err; - - rc = rpcrdma_bc_setup_reqs(r_xprt, reqs); - if (rc) - goto out_free; - r_xprt->rx_buf.rb_bc_srv_max_requests = reqs; + r_xprt->rx_buf.rb_bc_srv_max_requests = RPCRDMA_BACKWARD_WRS >> 1; trace_xprtrdma_cb_setup(r_xprt, reqs); return 0; - -out_free: - xprt_rdma_bc_destroy(xprt, reqs); - -out_err: - pr_err("RPC: %s: setup backchannel transport failed\n", __func__); - return -ENOMEM; } /** @@ -213,6 +160,43 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) spin_unlock(&xprt->bc_pa_lock); } +static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt) +{ + struct rpc_xprt *xprt = &r_xprt->rx_xprt; + struct rpcrdma_req *req; + struct rpc_rqst *rqst; + size_t size; + + spin_lock(&xprt->bc_pa_lock); + rqst = list_first_entry_or_null(&xprt->bc_pa_list, struct rpc_rqst, + rq_bc_pa_list); + if (!rqst) + goto create_req; + list_del(&rqst->rq_bc_pa_list); + spin_unlock(&xprt->bc_pa_lock); + return rqst; + +create_req: + spin_unlock(&xprt->bc_pa_lock); + + /* Set a limit to prevent a remote from overrunning our resources. + */ + if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS) + return NULL; + + size = min_t(size_t, r_xprt->rx_data.inline_rsize, PAGE_SIZE); + req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); + if (!req) + return NULL; + + xprt->bc_alloc_count++; + rqst = &req->rl_slot; + rqst->rq_xprt = xprt; + __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); + xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(req->rl_sendbuf), size); + return rqst; +} + /** * rpcrdma_bc_receive_call - Handle a backward direction call * @r_xprt: transport receiving the call @@ -244,18 +228,10 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, pr_info("RPC: %s: %*ph\n", __func__, size, p); #endif - /* Grab a free bc rqst */ - spin_lock(&xprt->bc_pa_lock); - if (list_empty(&xprt->bc_pa_list)) { - spin_unlock(&xprt->bc_pa_lock); + rqst = rpcrdma_bc_rqst_get(r_xprt); + if (!rqst) goto out_overflow; - } - rqst = list_first_entry(&xprt->bc_pa_list, - struct rpc_rqst, rq_bc_pa_list); - list_del(&rqst->rq_bc_pa_list); - spin_unlock(&xprt->bc_pa_lock); - /* Prepare rqst */ rqst->rq_reply_bytes_recvd = 0; rqst->rq_xid = *p; -- cgit From 4ba02e8d0ea5461d0e55e76c91481f1153f63365 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:42 -0400 Subject: xprtrdma: Increase maximum number of backchannel requests Reflects the change introduced in commit 067c46967160 ("NFSv4.1: Bump the default callback session slot count to 16"). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/xprt_rdma.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index b942c9d322a7..7621e2d0f107 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -103,12 +103,14 @@ struct rpcrdma_ep { /* Pre-allocate extra Work Requests for handling backward receives * and sends. This is a fixed value because the Work Queues are - * allocated when the forward channel is set up. + * allocated when the forward channel is set up, long before the + * backchannel is provisioned. This value is two times + * NFS4_DEF_CB_SLOT_TABLE_SIZE. */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) -#define RPCRDMA_BACKWARD_WRS (8) +#define RPCRDMA_BACKWARD_WRS (32) #else -#define RPCRDMA_BACKWARD_WRS (0) +#define RPCRDMA_BACKWARD_WRS (0) #endif /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV -- cgit From 17e4c443c0b433354016df60a7bd3f1c6aac759c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:48 -0400 Subject: xprtrdma: Trace marshaling failures Record an event when rpcrdma_marshal_req returns a non-zero return value to help track down why an xprt close might have occurred. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 231a44b9c152..45cba06655ea 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -875,6 +875,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) return 0; out_err: + trace_xprtrdma_marshal_failed(rqst, ret); switch (ret) { case -EAGAIN: xprt_wait_for_buffer_space(rqst->rq_xprt); -- cgit From dbcc53a52df256880c2ffa4ee45661419435998a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:53 -0400 Subject: xprtrdma: Clean up sendctx functions Minor clean-ups I've stumbled on since sendctx was merged last year. In particular, making Send completion processing more efficient appears to have a measurable impact on IOPS throughput. Note: test_and_clear_bit() returns a value, thus an explicit memory barrier is not necessary. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 27 ++++++++++++--------------- net/sunrpc/xprtrdma/verbs.c | 17 ++++++++--------- net/sunrpc/xprtrdma/xprt_rdma.h | 5 +++-- 3 files changed, 23 insertions(+), 26 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 45cba06655ea..5cb060c87543 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -508,30 +508,26 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, } /** - * rpcrdma_unmap_sendctx - DMA-unmap Send buffers + * rpcrdma_sendctx_unmap - DMA-unmap Send buffer * @sc: sendctx containing SGEs to unmap * */ -void -rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc) +void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc) { - struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia; struct ib_sge *sge; - unsigned int count; /* The first two SGEs contain the transport header and * the inline buffer. These are always left mapped so * they can be cheaply re-used. */ - sge = &sc->sc_sges[2]; - for (count = sc->sc_unmap_count; count; ++sge, --count) - ib_dma_unmap_page(ia->ri_device, - sge->addr, sge->length, DMA_TO_DEVICE); + for (sge = &sc->sc_sges[2]; sc->sc_unmap_count; + ++sge, --sc->sc_unmap_count) + ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length, + DMA_TO_DEVICE); - if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) { - smp_mb__after_atomic(); + if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, + &sc->sc_req->rl_flags)) wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES); - } } /* Prepare an SGE for the RPC-over-RDMA transport header. @@ -578,6 +574,7 @@ static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt, */ if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) goto out_regbuf; + sc->sc_device = rdmab_device(rb); sge_no = 1; sge[sge_no].addr = rdmab_addr(rb); sge[sge_no].length = xdr->head[0].iov_len; @@ -673,12 +670,12 @@ out_regbuf: return false; out_mapping_overflow: - rpcrdma_unmap_sendctx(sc); + rpcrdma_sendctx_unmap(sc); pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); return false; out_mapping_err: - rpcrdma_unmap_sendctx(sc); + rpcrdma_sendctx_unmap(sc); trace_xprtrdma_dma_maperr(sge[sge_no].addr); return false; } @@ -698,7 +695,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, u32 hdrlen, struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) { - req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf); + req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt); if (!req->rl_sendctx) return -EAGAIN; req->rl_sendctx->sc_wr.num_sge = 0; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 81548fc2ee7f..1ad25190f2d1 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -870,20 +870,20 @@ static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf, /** * rpcrdma_sendctx_get_locked - Acquire a send context - * @buf: transport buffers from which to acquire an unused context + * @r_xprt: controlling transport instance * * Returns pointer to a free send completion context; or NULL if * the queue is empty. * * Usage: Called to acquire an SGE array before preparing a Send WR. * - * The caller serializes calls to this function (per rpcrdma_buffer), - * and provides an effective memory barrier that flushes the new value + * The caller serializes calls to this function (per transport), and + * provides an effective memory barrier that flushes the new value * of rb_sc_head. */ -struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf) +struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_xprt *r_xprt; + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_sendctx *sc; unsigned long next_head; @@ -908,7 +908,6 @@ out_emptyq: * backing up. Cause the caller to pause and try again. */ set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags); - r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); r_xprt->rx_stats.empty_sendctx_q++; return NULL; } @@ -920,7 +919,7 @@ out_emptyq: * Usage: Called from Send completion to return a sendctxt * to the queue. * - * The caller serializes calls to this function (per rpcrdma_buffer). + * The caller serializes calls to this function (per transport). */ static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) @@ -928,7 +927,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; unsigned long next_tail; - /* Unmap SGEs of previously completed by unsignaled + /* Unmap SGEs of previously completed but unsignaled * Sends by walking up the queue until @sc is found. */ next_tail = buf->rb_sc_tail; @@ -936,7 +935,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) next_tail = rpcrdma_sendctx_next(buf, next_tail); /* ORDER: item must be accessed _before_ tail is updated */ - rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]); + rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]); } while (buf->rb_sc_ctxs[next_tail] != sc); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 7621e2d0f107..8afb5fc1de05 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -225,6 +225,7 @@ struct rpcrdma_xprt; struct rpcrdma_sendctx { struct ib_send_wr sc_wr; struct ib_cqe sc_cqe; + struct ib_device *sc_device; struct rpcrdma_xprt *sc_xprt; struct rpcrdma_req *sc_req; unsigned int sc_unmap_count; @@ -536,7 +537,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); -struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); +struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt); struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); void rpcrdma_mr_put(struct rpcrdma_mr *mr); @@ -625,7 +626,7 @@ int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, u32 hdrlen, struct xdr_buf *xdr, enum rpcrdma_chunktype rtype); -void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc); +void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc); int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); -- cgit From c209e49ceac0ff479f79ac5cd2fbf8be80621203 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:39:59 -0400 Subject: xprtrdma: More Send completion batching Instead of using a fixed number, allow the amount of Send completion batching to vary based on the client's maximum credit limit. - A larger default gives a small boost to IOPS throughput - Reducing it based on max_requests gives a safe result when the max credit limit is cranked down (eg. when the device has a small max_qp_wr). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 +--- net/sunrpc/xprtrdma/xprt_rdma.h | 10 ---------- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1ad25190f2d1..1ed761a12f86 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -521,9 +521,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_send_sge, ep->rep_attr.cap.max_recv_sge); - /* set trigger for requesting send completion */ - ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH, - cdata->max_requests >> 2); + ep->rep_send_batch = cdata->max_requests >> 3; ep->rep_send_count = ep->rep_send_batch; init_waitqueue_head(&ep->rep_connect_wait); ep->rep_receive_count = 0; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 8afb5fc1de05..f8563937c8c6 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -232,16 +232,6 @@ struct rpcrdma_sendctx { struct ib_sge sc_sges[]; }; -/* Limit the number of SGEs that can be unmapped during one - * Send completion. This caps the amount of work a single - * completion can do before returning to the provider. - * - * Setting this to zero disables Send completion batching. - */ -enum { - RPCRDMA_MAX_SEND_BATCH = 7, -}; - /* * struct rpcrdma_mr - external memory region metadata * -- cgit From f19bd0bbd363fb97756ed83f53f48413d3e601aa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:40:04 -0400 Subject: xprtrdma: Eliminate rpcrdma_ia::ri_device Clean up. Since commit 54cbd6b0c6b9 ("xprtrdma: Delay DMA mapping Send and Receive buffers"), a pointer to the device is now saved in each regbuf when it is DMA mapped. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 17 +++++++++-------- net/sunrpc/xprtrdma/verbs.c | 29 +++++++++++++---------------- net/sunrpc/xprtrdma/xprt_rdma.h | 7 +++---- 3 files changed, 25 insertions(+), 28 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index a2a2e01cb5dd..7cd27184ecd1 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -82,13 +82,13 @@ /** * frwr_is_supported - Check if device supports FRWR - * @ia: interface adapter to check + * @device: interface adapter to check * * Returns true if device supports FRWR, otherwise false */ -bool frwr_is_supported(struct rpcrdma_ia *ia) +bool frwr_is_supported(struct ib_device *device) { - struct ib_device_attr *attrs = &ia->ri_device->attrs; + struct ib_device_attr *attrs = &device->attrs; if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) goto out_not_supported; @@ -98,7 +98,7 @@ bool frwr_is_supported(struct rpcrdma_ia *ia) out_not_supported: pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n", - ia->ri_device->name); + device->name); return false; } @@ -131,7 +131,7 @@ frwr_mr_recycle_worker(struct work_struct *work) if (mr->mr_dir != DMA_NONE) { trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, + ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device, mr->mr_sg, mr->mr_nents, mr->mr_dir); mr->mr_dir = DMA_NONE; } @@ -211,7 +211,7 @@ out_list_err: int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { - struct ib_device_attr *attrs = &ia->ri_device->attrs; + struct ib_device_attr *attrs = &ia->ri_id->device->attrs; int max_qp_wr, depth, delta; ia->ri_mrtype = IB_MR_TYPE_MEM_REG; @@ -253,7 +253,7 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, } while (delta > 0); } - max_qp_wr = ia->ri_device->attrs.max_qp_wr; + max_qp_wr = ia->ri_id->device->attrs.max_qp_wr; max_qp_wr -= RPCRDMA_BACKWARD_WRS; max_qp_wr -= 1; if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) @@ -436,7 +436,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, } mr->mr_dir = rpcrdma_data_dir(writing); - mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); + mr->mr_nents = + ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir); if (!mr->mr_nents) goto out_dmamap_err; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1ed761a12f86..672993cee70d 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -250,7 +250,7 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) pr_info("rpcrdma: removing device %s for %s:%s\n", - ia->ri_device->name, + ia->ri_id->device->name, rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt)); #endif set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); @@ -259,7 +259,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) wait_for_completion(&ia->ri_remove_done); ia->ri_id = NULL; - ia->ri_device = NULL; /* Return 1 to ensure the core destroys the id. */ return 1; case RDMA_CM_EVENT_ESTABLISHED: @@ -294,7 +293,7 @@ disconnected: dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__, rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), - ia->ri_device->name, rdma_event_msg(event->event)); + ia->ri_id->device->name, rdma_event_msg(event->event)); return 0; } @@ -373,9 +372,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) rc = PTR_ERR(ia->ri_id); goto out_err; } - ia->ri_device = ia->ri_id->device; - ia->ri_pd = ib_alloc_pd(ia->ri_device, 0); + ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); @@ -384,12 +382,12 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) switch (xprt_rdma_memreg_strategy) { case RPCRDMA_FRWR: - if (frwr_is_supported(ia)) + if (frwr_is_supported(ia->ri_id->device)) break; /*FALLTHROUGH*/ default: pr_err("rpcrdma: Device %s does not support memreg mode %d\n", - ia->ri_device->name, xprt_rdma_memreg_strategy); + ia->ri_id->device->name, xprt_rdma_memreg_strategy); rc = -EINVAL; goto out_err; } @@ -471,7 +469,6 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) rdma_destroy_id(ia->ri_id); } ia->ri_id = NULL; - ia->ri_device = NULL; /* If the pd is still busy, xprtrdma missed freeing a resource */ if (ia->ri_pd && !IS_ERR(ia->ri_pd)) @@ -491,7 +488,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, unsigned int max_sge; int rc; - max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge, + max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge, RPCRDMA_MAX_SEND_SGES); if (max_sge < RPCRDMA_MIN_SEND_SGES) { pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); @@ -526,16 +523,16 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, init_waitqueue_head(&ep->rep_connect_wait); ep->rep_receive_count = 0; - sendcq = ib_alloc_cq(ia->ri_device, NULL, + sendcq = ib_alloc_cq(ia->ri_id->device, NULL, ep->rep_attr.cap.max_send_wr + 1, - ia->ri_device->num_comp_vectors > 1 ? 1 : 0, + ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0, IB_POLL_WORKQUEUE); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); goto out1; } - recvcq = ib_alloc_cq(ia->ri_device, NULL, + recvcq = ib_alloc_cq(ia->ri_id->device, NULL, ep->rep_attr.cap.max_recv_wr + 1, 0, IB_POLL_WORKQUEUE); if (IS_ERR(recvcq)) { @@ -561,7 +558,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; ep->rep_remote_cma.responder_resources = - min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom); + min_t(int, U8_MAX, ia->ri_id->device->attrs.max_qp_rd_atom); /* Limit transport retries so client can detect server * GID changes quickly. RPC layer handles re-establishing @@ -673,7 +670,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, */ old = id; rc = -ENETUNREACH; - if (ia->ri_device != id->device) { + if (ia->ri_id->device != id->device) { pr_err("rpcrdma: can't reconnect on different device!\n"); goto out_destroy; } @@ -1296,7 +1293,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) if (mr->mr_dir != DMA_NONE) { trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, + ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device, mr->mr_sg, mr->mr_nents, mr->mr_dir); mr->mr_dir = DMA_NONE; } @@ -1429,7 +1426,7 @@ bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_regbuf *rb) { - struct ib_device *device = r_xprt->rx_ia.ri_device; + struct ib_device *device = r_xprt->rx_ia.ri_id->device; if (rb->rg_direction == DMA_NONE) return false; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f8563937c8c6..40912bb34b64 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -66,11 +66,8 @@ * Interface Adapter -- one per transport instance */ struct rpcrdma_ia { - struct ib_device *ri_device; struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; - struct completion ri_done; - struct completion ri_remove_done; int ri_async_rc; unsigned int ri_max_segs; unsigned int ri_max_frwr_depth; @@ -80,6 +77,8 @@ struct rpcrdma_ia { bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; unsigned long ri_flags; + struct completion ri_done; + struct completion ri_remove_done; }; enum { @@ -585,7 +584,7 @@ rpcrdma_data_dir(bool writing) /* Memory registration calls xprtrdma/frwr_ops.c */ -bool frwr_is_supported(struct rpcrdma_ia *); +bool frwr_is_supported(struct ib_device *device); int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata); int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); -- cgit From 1f7d1c73c58a3d07a951ce23acfb4ec91a31d1e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:40:09 -0400 Subject: SUNRPC: Update comments based on recent changes Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index bc1c8247750d..a9d40bc7ebed 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -955,7 +955,7 @@ xprt_is_pinned_rqst(struct rpc_rqst *req) * @req: Request to pin * * Caller must ensure this is atomic with the call to xprt_lookup_rqst() - * so should be holding the xprt receive lock. + * so should be holding xprt->queue_lock. */ void xprt_pin_rqst(struct rpc_rqst *req) { @@ -967,7 +967,7 @@ EXPORT_SYMBOL_GPL(xprt_pin_rqst); * xprt_unpin_rqst - Unpin a request on the transport receive list * @req: Request to pin * - * Caller should be holding the xprt receive lock. + * Caller should be holding xprt->queue_lock. */ void xprt_unpin_rqst(struct rpc_rqst *req) { -- cgit From fd5951742dbc8c3695151e3f46b2fe2c4dac3559 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:40:15 -0400 Subject: xprtrdma: Remove rpcrdma_create_data_internal::rsize and wsize Clean up. xprt_rdma_max_inline_{read,write} cannot be set to large values by virtue of proc_dointvec_minmax. The current maximum is RPCRDMA_MAX_INLINE, which is much smaller than RPCRDMA_MAX_SEGS * PAGE_SIZE. The .rsize and .wsize fields are otherwise unused in the current code base, and thus can be removed. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 9 --------- net/sunrpc/xprtrdma/xprt_rdma.h | 2 -- 2 files changed, 11 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8cf4fa36ed66..6b7c84166d13 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -350,17 +350,8 @@ xprt_setup_rdma(struct xprt_create *args) xprt_rdma_format_addresses(xprt, sap); cdata.max_requests = xprt_rdma_slot_table_entries; - - cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ - cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ - cdata.inline_wsize = xprt_rdma_max_inline_write; - if (cdata.inline_wsize > cdata.wsize) - cdata.inline_wsize = cdata.wsize; - cdata.inline_rsize = xprt_rdma_max_inline_read; - if (cdata.inline_rsize > cdata.rsize) - cdata.inline_rsize = cdata.rsize; /* * Create new transport instance, which includes initialized diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 40912bb34b64..d34371d0d0f8 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -419,8 +419,6 @@ enum { */ struct rpcrdma_create_data_internal { unsigned int max_requests; /* max requests (slots) in flight */ - unsigned int rsize; /* mount rsize - max read hdr+data */ - unsigned int wsize; /* mount wsize - max write hdr+data */ unsigned int inline_rsize; /* max non-rdma read data payload */ unsigned int inline_wsize; /* max non-rdma write data payload */ }; -- cgit From 94087e978e9b645e07cc0fbdcf4140dda02f3d81 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:40:20 -0400 Subject: xprtrdma: Aggregate the inline settings in struct rpcrdma_ep Clean up. The inline settings are actually a characteristic of the endpoint, and not related to the device. They are also modified after the transport instance is created, so they do not belong in the cdata structure either. Lastly, let's use names that are more natural to RDMA than to NFS: inline_write -> inline_send and inline_read -> inline_recv. The /proc files retain their names to avoid breaking user space. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 6 +++--- net/sunrpc/xprtrdma/rpc_rdma.c | 34 +++++++++++++++++++--------------- net/sunrpc/xprtrdma/transport.c | 4 +--- net/sunrpc/xprtrdma/verbs.c | 24 +++++++++++++----------- net/sunrpc/xprtrdma/xprt_rdma.h | 9 +++++---- 5 files changed, 41 insertions(+), 36 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index ae51ef6a897a..ce986591f213 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -44,10 +44,10 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; size_t maxmsg; - maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize); + maxmsg = min_t(unsigned int, ep->rep_inline_send, ep->rep_inline_recv); maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE); return maxmsg - RPCRDMA_HDRLEN_MIN; } @@ -184,7 +184,7 @@ create_req: if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS) return NULL; - size = min_t(size_t, r_xprt->rx_data.inline_rsize, PAGE_SIZE); + size = min_t(size_t, r_xprt->rx_ep.rep_inline_recv, PAGE_SIZE); req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); if (!req) return NULL; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 5cb060c87543..85115a2e2639 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -105,16 +105,23 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) return size; } +/** + * rpcrdma_set_max_header_sizes - Initialize inline payload sizes + * @r_xprt: transport instance to initialize + * + * The max_inline fields contain the maximum size of an RPC message + * so the marshaling code doesn't have to repeat this calculation + * for every RPC. + */ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - unsigned int maxsegs = ia->ri_max_segs; - - ia->ri_max_inline_write = cdata->inline_wsize - - rpcrdma_max_call_header_size(maxsegs); - ia->ri_max_inline_read = cdata->inline_rsize - - rpcrdma_max_reply_header_size(maxsegs); + unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + + ep->rep_max_inline_send = + ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs); + ep->rep_max_inline_recv = + ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs); } /* The client can send a request inline as long as the RPCRDMA header @@ -131,7 +138,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdr = &rqst->rq_snd_buf; unsigned int count, remaining, offset; - if (xdr->len > r_xprt->rx_ia.ri_max_inline_write) + if (xdr->len > r_xprt->rx_ep.rep_max_inline_send) return false; if (xdr->page_len) { @@ -159,9 +166,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - - return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; + return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv; } /* The client is required to provide a Reply chunk if the maximum @@ -173,10 +178,9 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt, const struct rpc_rqst *rqst) { const struct xdr_buf *buf = &rqst->rq_rcv_buf; - const struct rpcrdma_ia *ia = &r_xprt->rx_ia; - return buf->head[0].iov_len + buf->tail[0].iov_len < - ia->ri_max_inline_read; + return (buf->head[0].iov_len + buf->tail[0].iov_len) < + r_xprt->rx_ep.rep_max_inline_recv; } /* Split @vec on page boundaries into SGEs. FMR registers pages, not diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6b7c84166d13..b37a3e0f6728 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -70,7 +70,7 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; -static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; +unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; int xprt_rdma_pad_optimize; @@ -350,8 +350,6 @@ xprt_setup_rdma(struct xprt_create *args) xprt_rdma_format_addresses(xprt, sap); cdata.max_requests = xprt_rdma_slot_table_entries; - cdata.inline_wsize = xprt_rdma_max_inline_write; - cdata.inline_rsize = xprt_rdma_max_inline_read; /* * Create new transport instance, which includes initialized diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 672993cee70d..9e24ca502430 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -188,7 +188,6 @@ static void rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, struct rdma_conn_param *param) { - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; const struct rpcrdma_connect_private *pmsg = param->private_data; unsigned int rsize, wsize; @@ -205,12 +204,13 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } - if (rsize < cdata->inline_rsize) - cdata->inline_rsize = rsize; - if (wsize < cdata->inline_wsize) - cdata->inline_wsize = wsize; - dprintk("RPC: %s: max send %u, max recv %u\n", - __func__, cdata->inline_wsize, cdata->inline_rsize); + if (rsize < r_xprt->rx_ep.rep_inline_recv) + r_xprt->rx_ep.rep_inline_recv = rsize; + if (wsize < r_xprt->rx_ep.rep_inline_send) + r_xprt->rx_ep.rep_inline_send = wsize; + dprintk("RPC: %s: max send %u, max recv %u\n", __func__, + r_xprt->rx_ep.rep_inline_send, + r_xprt->rx_ep.rep_inline_recv); rpcrdma_set_max_header_sizes(r_xprt); } @@ -488,6 +488,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, unsigned int max_sge; int rc; + ep->rep_inline_send = xprt_rdma_max_inline_write; + ep->rep_inline_recv = xprt_rdma_max_inline_read; + max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge, RPCRDMA_MAX_SEND_SGES); if (max_sge < RPCRDMA_MIN_SEND_SGES) { @@ -550,8 +553,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, pmsg->cp_magic = rpcrdma_cmp_magic; pmsg->cp_version = RPCRDMA_CMP_VERSION; pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; - pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); - pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); + pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->rep_inline_send); + pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->rep_inline_recv); ep->rep_remote_cma.private_data = pmsg; ep->rep_remote_cma.private_data_len = sizeof(*pmsg); @@ -1045,7 +1048,6 @@ out1: static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) { - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; @@ -1053,7 +1055,7 @@ static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) if (rep == NULL) goto out; - rep->rr_rdmabuf = rpcrdma_regbuf_alloc(cdata->inline_rsize, + rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep.rep_inline_recv, DMA_FROM_DEVICE, GFP_KERNEL); if (!rep->rr_rdmabuf) goto out_free; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index d34371d0d0f8..edf602afb08a 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -71,8 +71,6 @@ struct rpcrdma_ia { int ri_async_rc; unsigned int ri_max_segs; unsigned int ri_max_frwr_depth; - unsigned int ri_max_inline_write; - unsigned int ri_max_inline_read; unsigned int ri_max_send_sges; bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; @@ -92,11 +90,15 @@ enum { struct rpcrdma_ep { unsigned int rep_send_count; unsigned int rep_send_batch; + unsigned int rep_max_inline_send; + unsigned int rep_max_inline_recv; int rep_connected; struct ib_qp_init_attr rep_attr; wait_queue_head_t rep_connect_wait; struct rpcrdma_connect_private rep_cm_private; struct rdma_conn_param rep_remote_cma; + unsigned int rep_inline_send; /* negotiated */ + unsigned int rep_inline_recv; /* negotiated */ int rep_receive_count; }; @@ -419,8 +421,6 @@ enum { */ struct rpcrdma_create_data_internal { unsigned int max_requests; /* max requests (slots) in flight */ - unsigned int inline_rsize; /* max non-rdma read data payload */ - unsigned int inline_wsize; /* max non-rdma write data payload */ }; /* @@ -631,6 +631,7 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) /* RPC/RDMA module init - xprtrdma/transport.c */ extern unsigned int xprt_rdma_max_inline_read; +extern unsigned int xprt_rdma_max_inline_write; void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); void xprt_rdma_free_addresses(struct rpc_xprt *xprt); void xprt_rdma_close(struct rpc_xprt *xprt); -- cgit From 86c4ccd9b92ba6541fc4734e82f87139deea0470 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:40:25 -0400 Subject: xprtrdma: Eliminate struct rpcrdma_create_data_internal Clean up. Move the remaining field in rpcrdma_create_data_internal so the structure can be removed. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 21 ++++++++---------- net/sunrpc/xprtrdma/transport.c | 27 ++++------------------- net/sunrpc/xprtrdma/verbs.c | 47 ++++++++++++++++++++++++----------------- net/sunrpc/xprtrdma/xprt_rdma.h | 22 +++++-------------- 4 files changed, 46 insertions(+), 71 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 7cd27184ecd1..1d369b65e845 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -194,12 +194,11 @@ out_list_err: * frwr_open - Prepare an endpoint for use with FRWR * @ia: interface adapter this endpoint will use * @ep: endpoint to prepare - * @cdata: transport parameters * * On success, sets: * ep->rep_attr.cap.max_send_wr * ep->rep_attr.cap.max_recv_wr - * cdata->max_requests + * ep->rep_max_requests * ia->ri_max_segs * * And these FRWR-related fields: @@ -208,8 +207,7 @@ out_list_err: * * On failure, a negative errno is returned. */ -int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, - struct rpcrdma_create_data_internal *cdata) +int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) { struct ib_device_attr *attrs = &ia->ri_id->device->attrs; int max_qp_wr, depth, delta; @@ -258,19 +256,18 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, max_qp_wr -= 1; if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) return -ENOMEM; - if (cdata->max_requests > max_qp_wr) - cdata->max_requests = max_qp_wr; - ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth; + if (ep->rep_max_requests > max_qp_wr) + ep->rep_max_requests = max_qp_wr; + ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth; if (ep->rep_attr.cap.max_send_wr > max_qp_wr) { - cdata->max_requests = max_qp_wr / depth; - if (!cdata->max_requests) + ep->rep_max_requests = max_qp_wr / depth; + if (!ep->rep_max_requests) return -EINVAL; - ep->rep_attr.cap.max_send_wr = cdata->max_requests * - depth; + ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth; } ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ - ep->rep_attr.cap.max_recv_wr = cdata->max_requests; + ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index b37a3e0f6728..1f73a6a7e43c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -68,7 +68,7 @@ * tunables */ -static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; +unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; @@ -288,7 +288,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) cancel_delayed_work_sync(&r_xprt->rx_connect_worker); - rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); + rpcrdma_ep_destroy(r_xprt); rpcrdma_buffer_destroy(&r_xprt->rx_buf); rpcrdma_ia_close(&r_xprt->rx_ia); @@ -311,10 +311,8 @@ static const struct rpc_timeout xprt_rdma_default_timeout = { static struct rpc_xprt * xprt_setup_rdma(struct xprt_create *args) { - struct rpcrdma_create_data_internal cdata; struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; - struct rpcrdma_ep *new_ep; struct sockaddr *sap; int rc; @@ -349,29 +347,12 @@ xprt_setup_rdma(struct xprt_create *args) xprt_set_bound(xprt); xprt_rdma_format_addresses(xprt, sap); - cdata.max_requests = xprt_rdma_slot_table_entries; - - /* - * Create new transport instance, which includes initialized - * o ia - * o endpoint - * o buffers - */ - new_xprt = rpcx_to_rdmax(xprt); - rc = rpcrdma_ia_open(new_xprt); if (rc) goto out1; - /* - * initialize and create ep - */ - new_xprt->rx_data = cdata; - new_ep = &new_xprt->rx_ep; - - rc = rpcrdma_ep_create(&new_xprt->rx_ep, - &new_xprt->rx_ia, &new_xprt->rx_data); + rc = rpcrdma_ep_create(new_xprt); if (rc) goto out2; @@ -402,7 +383,7 @@ out4: rpcrdma_buffer_destroy(&new_xprt->rx_buf); rc = -ENODEV; out3: - rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); + rpcrdma_ep_destroy(new_xprt); out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 9e24ca502430..0d0c3356f34e 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -476,18 +476,22 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) ia->ri_pd = NULL; } -/* - * Create unconnected endpoint. +/** + * rpcrdma_ep_create - Create unconnected endpoint + * @r_xprt: transport to instantiate + * + * Returns zero on success, or a negative errno. */ -int -rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, - struct rpcrdma_create_data_internal *cdata) +int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; struct ib_cq *sendcq, *recvcq; unsigned int max_sge; int rc; + ep->rep_max_requests = xprt_rdma_slot_table_entries; ep->rep_inline_send = xprt_rdma_max_inline_write; ep->rep_inline_recv = xprt_rdma_max_inline_read; @@ -499,7 +503,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, } ia->ri_max_send_sges = max_sge; - rc = frwr_open(ia, ep, cdata); + rc = frwr_open(ia, ep); if (rc) return rc; @@ -521,7 +525,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_send_sge, ep->rep_attr.cap.max_recv_sge); - ep->rep_send_batch = cdata->max_requests >> 3; + ep->rep_send_batch = ep->rep_max_requests >> 3; ep->rep_send_count = ep->rep_send_batch; init_waitqueue_head(&ep->rep_connect_wait); ep->rep_receive_count = 0; @@ -584,16 +588,16 @@ out1: return rc; } -/* - * rpcrdma_ep_destroy +/** + * rpcrdma_ep_destroy - Disconnect and destroy endpoint. + * @r_xprt: transport instance to shut down * - * Disconnect and destroy endpoint. After this, the only - * valid operations on the ep are to free it (if dynamically - * allocated) or re-create it. */ -void -rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + if (ia->ri_id && ia->ri_id->qp) { rpcrdma_ep_disconnect(ep, ia); rdma_destroy_qp(ia->ri_id); @@ -623,7 +627,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, goto out1; rc = -ENOMEM; - err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data); + err = rpcrdma_ep_create(r_xprt); if (err) { pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err); goto out2; @@ -640,7 +644,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, return 0; out3: - rpcrdma_ep_destroy(ep, ia); + rpcrdma_ep_destroy(r_xprt); out2: rpcrdma_ia_close(ia); out1: @@ -1082,14 +1086,19 @@ out: return false; } -int -rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) +/** + * rpcrdma_buffer_create - Create initial set of req/rep objects + * @r_xprt: transport instance to (re)initialize + * + * Returns zero on success, otherwise a negative errno. + */ +int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; int i, rc; buf->rb_flags = 0; - buf->rb_max_requests = r_xprt->rx_data.max_requests; + buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests; buf->rb_bc_srv_max_requests = 0; spin_lock_init(&buf->rb_mrlock); spin_lock_init(&buf->rb_lock); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index edf602afb08a..9e98ee0cd937 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -97,6 +97,7 @@ struct rpcrdma_ep { wait_queue_head_t rep_connect_wait; struct rpcrdma_connect_private rep_cm_private; struct rdma_conn_param rep_remote_cma; + unsigned int rep_max_requests; /* set by /proc */ unsigned int rep_inline_send; /* negotiated */ unsigned int rep_inline_recv; /* negotiated */ int rep_receive_count; @@ -413,16 +414,6 @@ enum { RPCRDMA_BUF_F_EMPTY_SCQ = 0, }; -/* - * Internal structure for transport instance creation. This - * exists primarily for modularity. - * - * This data should be set with mount options - */ -struct rpcrdma_create_data_internal { - unsigned int max_requests; /* max requests (slots) in flight */ -}; - /* * Statistics for RPCRDMA */ @@ -467,13 +458,11 @@ struct rpcrdma_xprt { struct rpcrdma_ia rx_ia; struct rpcrdma_ep rx_ep; struct rpcrdma_buffer rx_buf; - struct rpcrdma_create_data_internal rx_data; struct delayed_work rx_connect_worker; struct rpcrdma_stats rx_stats; }; #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) -#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) static inline const char * rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt) @@ -507,9 +496,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); /* * Endpoint calls - xprtrdma/verbs.c */ -int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, - struct rpcrdma_create_data_internal *); -void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt); +void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt); int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); @@ -583,8 +571,7 @@ rpcrdma_data_dir(bool writing) /* Memory registration calls xprtrdma/frwr_ops.c */ bool frwr_is_supported(struct ib_device *device); -int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, - struct rpcrdma_create_data_internal *cdata); +int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep); int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); void frwr_release_mr(struct rpcrdma_mr *mr); size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); @@ -630,6 +617,7 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) /* RPC/RDMA module init - xprtrdma/transport.c */ +extern unsigned int xprt_rdma_slot_table_entries; extern unsigned int xprt_rdma_max_inline_read; extern unsigned int xprt_rdma_max_inline_write; void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); -- cgit From 5f2311f5bd359d6d810922bf25c238053a449f2a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:40:31 -0400 Subject: xprtrdma: Remove pr_err() call sites from completion handlers Clean up: rely on the trace points instead. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 23 ++++------------------- net/sunrpc/xprtrdma/verbs.c | 9 --------- 2 files changed, 4 insertions(+), 28 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 1d369b65e845..794ba4ca0994 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -297,15 +297,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt) (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth); } -static void -__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) -{ - if (wc->status != IB_WC_WR_FLUSH_ERR) - pr_err("rpcrdma: %s: %s (%u/0x%x)\n", - wr, ib_wc_status_msg(wc->status), - wc->status, wc->vendor_err); -} - /** * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC * @cq: completion queue (ignored) @@ -320,10 +311,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) container_of(cqe, struct rpcrdma_frwr, fr_cqe); /* WARNING: Only wr_cqe and status are reliable at this point */ - if (wc->status != IB_WC_SUCCESS) { + if (wc->status != IB_WC_SUCCESS) frwr->fr_state = FRWR_FLUSHED_FR; - __frwr_sendcompletion_flush(wc, "fastreg"); - } trace_xprtrdma_wc_fastreg(wc, frwr); } @@ -341,10 +330,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) fr_cqe); /* WARNING: Only wr_cqe and status are reliable at this point */ - if (wc->status != IB_WC_SUCCESS) { + if (wc->status != IB_WC_SUCCESS) frwr->fr_state = FRWR_FLUSHED_LI; - __frwr_sendcompletion_flush(wc, "localinv"); - } trace_xprtrdma_wc_li(wc, frwr); } @@ -363,12 +350,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) fr_cqe); /* WARNING: Only wr_cqe and status are reliable at this point */ - if (wc->status != IB_WC_SUCCESS) { + if (wc->status != IB_WC_SUCCESS) frwr->fr_state = FRWR_FLUSHED_LI; - __frwr_sendcompletion_flush(wc, "localinv"); - } - complete(&frwr->fr_linv_done); trace_xprtrdma_wc_li_wake(wc, frwr); + complete(&frwr->fr_linv_done); } /** diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 0d0c3356f34e..fcbcd4afaa5a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -135,11 +135,6 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_send(sc, wc); - if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) - pr_err("rpcrdma: Send: %s (%u/0x%x)\n", - ib_wc_status_msg(wc->status), - wc->status, wc->vendor_err); - rpcrdma_sendctx_put_locked(sc); } @@ -177,10 +172,6 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; out_flushed: - if (wc->status != IB_WC_WR_FLUSH_ERR) - pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", - ib_wc_status_msg(wc->status), - wc->status, wc->vendor_err); rpcrdma_recv_buffer_put(rep); } -- cgit From b8fe677fd059deadb2f7f71c4dea747be84d75e0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:40:36 -0400 Subject: xprtrdma: Update comments that reference ib_drain_qp Commit e1ede312f17e ("xprtrdma: Fix helper that drains the transport") replaced the ib_drain_qp() call, so update documenting comments to reflect current operation. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fcbcd4afaa5a..bef5eac8ab38 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -83,7 +83,9 @@ static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); -/* Wait for outstanding transport work to finish. +/* Wait for outstanding transport work to finish. ib_drain_qp + * handles the drains in the wrong order for us, so open code + * them here. */ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { @@ -792,8 +794,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) */ /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced - * queue activity, and ib_drain_qp has flushed all remaining Send - * requests. + * queue activity, and rpcrdma_xprt_drain has flushed all remaining + * Send requests. */ static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf) { @@ -1194,7 +1196,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) * rpcrdma_buffer_destroy - Release all hw resources * @buf: root control block for resources * - * ORDERING: relies on a prior ib_drain_qp : + * ORDERING: relies on a prior rpcrdma_xprt_drain : * - No more Send or Receive completions can occur * - All MRs, reps, and reqs are returned to their free lists */ -- cgit From 2cfd11f16f01c0ee8f83bb07027c9d2f43565473 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Apr 2019 09:40:41 -0400 Subject: xprtrdma: Remove stale comment The comment hasn't been accurate for several years. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/xprt_rdma.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 9e98ee0cd937..d1e0749bcbc4 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -239,13 +239,6 @@ struct rpcrdma_sendctx { * * An external memory region is any buffer or page that is registered * on the fly (ie, not pre-registered). - * - * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During - * call_allocate, rpcrdma_buffer_get() assigns one to each segment in - * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep - * track of registration metadata while each RPC is pending. - * rpcrdma_deregister_external() uses this metadata to unmap and - * release these resources when an RPC is complete. */ enum rpcrdma_frwr_state { FRWR_IS_INVALID, /* ready to be used */ -- cgit From 79caa5fad47c69874f9efc4ac3128cc3f6d36f6e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Apr 2019 17:46:42 -0400 Subject: SUNRPC: Cache cred of process creating the rpc_client When converting kuids to AUTH_UNIX creds, etc we will want to use the same user namespace as the process that created the rpc client. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 7 +++++++ net/sunrpc/rpcb_clnt.c | 9 +++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e933f1185317..369a2648dafc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -394,6 +394,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, if (err) goto out_no_clid; + clnt->cl_cred = get_cred(args->cred); clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_prog = args->prognumber ? : program->number; @@ -439,6 +440,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, out_no_path: rpc_free_iostats(clnt->cl_metrics); out_no_stats: + put_cred(clnt->cl_cred); rpc_free_clid(clnt); out_no_clid: kfree(clnt); @@ -631,6 +633,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; new->cl_principal = clnt->cl_principal; + new->cl_cred = get_cred(clnt->cl_cred); return new; out_err: @@ -652,6 +655,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt) .prognumber = clnt->cl_prog, .version = clnt->cl_vers, .authflavor = clnt->cl_auth->au_flavor, + .cred = clnt->cl_cred, }; return __rpc_clone_client(&args, clnt); } @@ -673,6 +677,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) .prognumber = clnt->cl_prog, .version = clnt->cl_vers, .authflavor = flavor, + .cred = clnt->cl_cred, }; return __rpc_clone_client(&args, clnt); } @@ -880,6 +885,7 @@ rpc_free_client(struct rpc_clnt *clnt) xprt_put(rcu_dereference_raw(clnt->cl_xprt)); xprt_iter_destroy(&clnt->cl_xpi); rpciod_down(); + put_cred(clnt->cl_cred); rpc_free_clid(clnt); kfree(clnt); return parent; @@ -944,6 +950,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, .prognumber = program->number, .version = vers, .authflavor = old->cl_auth->au_flavor, + .cred = old->cl_cred, }; struct rpc_clnt *clnt; int err; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 18b0cf2a923f..2277b7cdad27 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -240,6 +240,7 @@ static int rpcb_create_local_unix(struct net *net) .program = &rpcb_program, .version = RPCBVERS_2, .authflavor = RPC_AUTH_NULL, + .cred = current_cred(), /* * We turn off the idle timeout to prevent the kernel * from automatically disconnecting the socket. @@ -299,6 +300,7 @@ static int rpcb_create_local_net(struct net *net) .program = &rpcb_program, .version = RPCBVERS_2, .authflavor = RPC_AUTH_UNIX, + .cred = current_cred(), .flags = RPC_CLNT_CREATE_NOPING, }; struct rpc_clnt *clnt, *clnt4; @@ -358,7 +360,8 @@ out: static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename, const char *hostname, struct sockaddr *srvaddr, size_t salen, - int proto, u32 version) + int proto, u32 version, + const struct cred *cred) { struct rpc_create_args args = { .net = net, @@ -370,6 +373,7 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, + .cred = cred, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_NONPRIVPORT), }; @@ -745,7 +749,8 @@ void rpcb_getport_async(struct rpc_task *task) rpcb_clnt = rpcb_create(xprt->xprt_net, clnt->cl_nodename, xprt->servername, sap, salen, - xprt->prot, bind_version); + xprt->prot, bind_version, + clnt->cl_cred); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", -- cgit From 283ebe3ec4157c5cdc2581ed7e5c3764137f8fe5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Apr 2019 17:46:44 -0400 Subject: SUNRPC: Use the client user namespace when encoding creds When encoding AUTH_UNIX creds and AUTH_GSS upcalls, use the user namespace of the process that created the rpc client. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/auth_gss/auth_gss.c | 11 ++++++++--- net/sunrpc/auth_unix.c | 9 +++++---- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index c055edfec55e..5dc2aef2232a 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -412,7 +412,10 @@ gss_upcall_callback(struct rpc_task *task) static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) { - uid_t uid = from_kuid(&init_user_ns, gss_msg->uid); + struct user_namespace *userns = gss_msg->auth->client->cl_cred ? + gss_msg->auth->client->cl_cred->user_ns : &init_user_ns; + + uid_t uid = from_kuid_munged(userns, gss_msg->uid); memcpy(gss_msg->databuf, &uid, sizeof(uid)); gss_msg->msg.data = gss_msg->databuf; gss_msg->msg.len = sizeof(uid); @@ -424,13 +427,15 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, const char *service_name, const char *target_name) { + struct user_namespace *userns = gss_msg->auth->client->cl_cred ? + gss_msg->auth->client->cl_cred->user_ns : &init_user_ns; struct gss_api_mech *mech = gss_msg->auth->mech; char *p = gss_msg->databuf; size_t buflen = sizeof(gss_msg->databuf); int len; len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name, - from_kuid(&init_user_ns, gss_msg->uid)); + from_kuid_munged(userns, gss_msg->uid)); buflen -= len; p += len; gss_msg->msg.len = len; @@ -706,7 +711,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) goto err; } - uid = make_kuid(&init_user_ns, id); + uid = make_kuid(current_user_ns(), id); if (!uid_valid(uid)) { err = -EINVAL; goto err; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index d4018e5a24c5..e7df1f782b2e 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -107,6 +107,8 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr) __be32 *p, *cred_len, *gidarr_len; int i; struct group_info *gi = cred->cr_cred->group_info; + struct user_namespace *userns = clnt->cl_cred ? + clnt->cl_cred->user_ns : &init_user_ns; /* Credential */ @@ -122,14 +124,13 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr) p = xdr_reserve_space(xdr, 3 * sizeof(*p)); if (!p) goto marshal_failed; - *p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid)); - *p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid)); + *p++ = cpu_to_be32(from_kuid_munged(userns, cred->cr_cred->fsuid)); + *p++ = cpu_to_be32(from_kgid_munged(userns, cred->cr_cred->fsgid)); gidarr_len = p++; if (gi) for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++) - *p++ = cpu_to_be32(from_kgid(&init_user_ns, - gi->gid[i])); + *p++ = cpu_to_be32(from_kgid_munged(userns, gi->gid[i])); *gidarr_len = cpu_to_be32(p - gidarr_len - 1); *cred_len = cpu_to_be32((p - cred_len - 1) << 2); p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2); -- cgit From ac83228a7101e655ba5a7fa61ae10b058ada15db Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Apr 2019 17:46:45 -0400 Subject: SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall When the client needs to talk to rpc.gssd, we should ensure that the uid argument is encoded to match the user namespace of the daemon. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/auth_gss/auth_gss.c | 60 +++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 16 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5dc2aef2232a..b2cbc83d39c7 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -269,6 +269,7 @@ err: struct gss_upcall_msg { refcount_t count; kuid_t uid; + const char *service_name; struct rpc_pipe_msg msg; struct list_head list; struct gss_auth *auth; @@ -316,6 +317,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) gss_put_ctx(gss_msg->ctx); rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); gss_put_auth(gss_msg->auth); + kfree_const(gss_msg->service_name); kfree(gss_msg); } @@ -410,10 +412,10 @@ gss_upcall_callback(struct rpc_task *task) gss_release_msg(gss_msg); } -static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) +static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg, + const struct cred *cred) { - struct user_namespace *userns = gss_msg->auth->client->cl_cred ? - gss_msg->auth->client->cl_cred->user_ns : &init_user_ns; + struct user_namespace *userns = cred->user_ns; uid_t uid = from_kuid_munged(userns, gss_msg->uid); memcpy(gss_msg->databuf, &uid, sizeof(uid)); @@ -423,12 +425,24 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf)); } +static ssize_t +gss_v0_upcall(struct file *file, struct rpc_pipe_msg *msg, + char __user *buf, size_t buflen) +{ + struct gss_upcall_msg *gss_msg = container_of(msg, + struct gss_upcall_msg, + msg); + if (msg->copied == 0) + gss_encode_v0_msg(gss_msg, file->f_cred); + return rpc_pipe_generic_upcall(file, msg, buf, buflen); +} + static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, const char *service_name, - const char *target_name) + const char *target_name, + const struct cred *cred) { - struct user_namespace *userns = gss_msg->auth->client->cl_cred ? - gss_msg->auth->client->cl_cred->user_ns : &init_user_ns; + struct user_namespace *userns = cred->user_ns; struct gss_api_mech *mech = gss_msg->auth->mech; char *p = gss_msg->databuf; size_t buflen = sizeof(gss_msg->databuf); @@ -496,6 +510,25 @@ out_overflow: return -ENOMEM; } +static ssize_t +gss_v1_upcall(struct file *file, struct rpc_pipe_msg *msg, + char __user *buf, size_t buflen) +{ + struct gss_upcall_msg *gss_msg = container_of(msg, + struct gss_upcall_msg, + msg); + int err; + if (msg->copied == 0) { + err = gss_encode_v1_msg(gss_msg, + gss_msg->service_name, + gss_msg->auth->target_name, + file->f_cred); + if (err) + return err; + } + return rpc_pipe_generic_upcall(file, msg, buf, buflen); +} + static struct gss_upcall_msg * gss_alloc_msg(struct gss_auth *gss_auth, kuid_t uid, const char *service_name) @@ -518,16 +551,11 @@ gss_alloc_msg(struct gss_auth *gss_auth, refcount_set(&gss_msg->count, 1); gss_msg->uid = uid; gss_msg->auth = gss_auth; - switch (vers) { - case 0: - gss_encode_v0_msg(gss_msg); - break; - default: - err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name); - if (err) + if (service_name) { + gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS); + if (!gss_msg->service_name) goto err_put_pipe_version; } - kref_get(&gss_auth->kref); return gss_msg; err_put_pipe_version: put_pipe_version(gss_auth->net); @@ -2120,7 +2148,7 @@ static const struct rpc_credops gss_nullops = { }; static const struct rpc_pipe_ops gss_upcall_ops_v0 = { - .upcall = rpc_pipe_generic_upcall, + .upcall = gss_v0_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v0, @@ -2128,7 +2156,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = { }; static const struct rpc_pipe_ops gss_upcall_ops_v1 = { - .upcall = rpc_pipe_generic_upcall, + .upcall = gss_v1_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v1, -- cgit From 9c5948c248696ca60c56ec5a608e225c4ab8a854 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Mon, 29 Apr 2019 17:32:31 +0800 Subject: SUNRPC: task should be exit if encode return EKEYEXPIRED more times If the rpc.gssd always return cred success, but now the cred is expired, then the task will loop in call_refresh and call_transmit. Exit the rpc task after retry. Signed-off-by: ZhangXiaoxu Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 369a2648dafc..c1f1afabd024 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1814,7 +1814,14 @@ call_encode(struct rpc_task *task) rpc_delay(task, HZ >> 4); break; case -EKEYEXPIRED: - task->tk_action = call_refresh; + if (!task->tk_cred_retry) { + rpc_exit(task, task->tk_status); + } else { + task->tk_action = call_refresh; + task->tk_cred_retry--; + dprintk("RPC: %5u %s: retry refresh creds\n", + task->tk_pid, __func__); + } break; default: rpc_call_rpcerror(task, task->tk_status); -- cgit From fe31ce83cbac7adcaa629b59179f502981be5f8b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 May 2019 15:30:09 +0300 Subject: SUNRPC: Fix an error code in gss_alloc_msg() If kstrdup_const() then this function returns zero (success) but it should return -ENOMEM. Fixes: ac83228a7101 ("SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall") Signed-off-by: Dan Carpenter Signed-off-by: Anna Schumaker --- net/sunrpc/auth_gss/auth_gss.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index b2cbc83d39c7..06fe17c2aea1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -553,8 +553,10 @@ gss_alloc_msg(struct gss_auth *gss_auth, gss_msg->auth = gss_auth; if (service_name) { gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS); - if (!gss_msg->service_name) + if (!gss_msg->service_name) { + err = -ENOMEM; goto err_put_pipe_version; + } } return gss_msg; err_put_pipe_version: -- cgit From 5940d1cf9f42f67e9cc3f7df9eda39f5888d6e9e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 9 May 2019 11:00:07 -0400 Subject: SUNRPC: Rebalance a kref in auth_gss.c Restore the kref_get that matches the gss_put_auth(gss_msg->auth) done by gss_release_msg(). Fixes: ac83228a7101 ("SUNRPC: Use namespace of listening daemon ...") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/auth_gss/auth_gss.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 06fe17c2aea1..4ce42c62458e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -551,6 +551,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, refcount_set(&gss_msg->count, 1); gss_msg->uid = uid; gss_msg->auth = gss_auth; + kref_get(&gss_auth->kref); if (service_name) { gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS); if (!gss_msg->service_name) { -- cgit