diff options
Diffstat (limited to 'net/sunrpc/svc.c')
| -rw-r--r-- | net/sunrpc/svc.c | 863 |
1 files changed, 469 insertions, 394 deletions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index f06622814a95..4704dce7284e 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -32,6 +32,7 @@ #include <trace/events/sunrpc.h> #include "fail.h" +#include "sunrpc.h" #define RPCDBG_FACILITY RPCDBG_SVCDSP @@ -72,57 +73,100 @@ static struct svc_pool_map svc_pool_map = { static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ static int -param_set_pool_mode(const char *val, const struct kernel_param *kp) +__param_set_pool_mode(const char *val, struct svc_pool_map *m) { - int *ip = (int *)kp->arg; - struct svc_pool_map *m = &svc_pool_map; - int err; + int err, mode; mutex_lock(&svc_pool_map_mutex); - err = -EBUSY; - if (m->count) - goto out; - err = 0; if (!strncmp(val, "auto", 4)) - *ip = SVC_POOL_AUTO; + mode = SVC_POOL_AUTO; else if (!strncmp(val, "global", 6)) - *ip = SVC_POOL_GLOBAL; + mode = SVC_POOL_GLOBAL; else if (!strncmp(val, "percpu", 6)) - *ip = SVC_POOL_PERCPU; + mode = SVC_POOL_PERCPU; else if (!strncmp(val, "pernode", 7)) - *ip = SVC_POOL_PERNODE; + mode = SVC_POOL_PERNODE; else err = -EINVAL; + if (err) + goto out; + + if (m->count == 0) + m->mode = mode; + else if (mode != m->mode) + err = -EBUSY; out: mutex_unlock(&svc_pool_map_mutex); return err; } static int -param_get_pool_mode(char *buf, const struct kernel_param *kp) +param_set_pool_mode(const char *val, const struct kernel_param *kp) +{ + struct svc_pool_map *m = kp->arg; + + return __param_set_pool_mode(val, m); +} + +int sunrpc_set_pool_mode(const char *val) +{ + return __param_set_pool_mode(val, &svc_pool_map); +} +EXPORT_SYMBOL(sunrpc_set_pool_mode); + +/** + * sunrpc_get_pool_mode - get the current pool_mode for the host + * @buf: where to write the current pool_mode + * @size: size of @buf + * + * Grab the current pool_mode from the svc_pool_map and write + * the resulting string to @buf. Returns the number of characters + * written to @buf (a'la snprintf()). + */ +int +sunrpc_get_pool_mode(char *buf, size_t size) { - int *ip = (int *)kp->arg; + struct svc_pool_map *m = &svc_pool_map; - switch (*ip) + switch (m->mode) { case SVC_POOL_AUTO: - return strlcpy(buf, "auto\n", 20); + return snprintf(buf, size, "auto"); case SVC_POOL_GLOBAL: - return strlcpy(buf, "global\n", 20); + return snprintf(buf, size, "global"); case SVC_POOL_PERCPU: - return strlcpy(buf, "percpu\n", 20); + return snprintf(buf, size, "percpu"); case SVC_POOL_PERNODE: - return strlcpy(buf, "pernode\n", 20); + return snprintf(buf, size, "pernode"); default: - return sprintf(buf, "%d\n", *ip); + return snprintf(buf, size, "%d", m->mode); } } +EXPORT_SYMBOL(sunrpc_get_pool_mode); + +static int +param_get_pool_mode(char *buf, const struct kernel_param *kp) +{ + char str[16]; + int len; + + len = sunrpc_get_pool_mode(str, ARRAY_SIZE(str)); + + /* Ensure we have room for newline and NUL */ + len = min_t(int, len, ARRAY_SIZE(str) - 2); + + /* tack on the newline */ + str[len] = '\n'; + str[len + 1] = '\0'; + + return sysfs_emit(buf, "%s", str); +} module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode, - &svc_pool_map.mode, 0644); + &svc_pool_map, 0644); /* * Detect best pool mapping mode heuristically, @@ -250,10 +294,8 @@ svc_pool_map_get(void) int npools = -1; mutex_lock(&svc_pool_map_mutex); - if (m->count++) { mutex_unlock(&svc_pool_map_mutex); - WARN_ON_ONCE(m->npools <= 1); return m->npools; } @@ -275,32 +317,21 @@ svc_pool_map_get(void) m->mode = SVC_POOL_GLOBAL; } m->npools = npools; - - if (npools == 1) - /* service is unpooled, so doesn't hold a reference */ - m->count--; - mutex_unlock(&svc_pool_map_mutex); return npools; } /* - * Drop a reference to the global map of cpus to pools, if - * pools were in use, i.e. if npools > 1. + * Drop a reference to the global map of cpus to pools. * When the last reference is dropped, the map data is - * freed; this allows the sysadmin to change the pool - * mode using the pool_mode module option without - * rebooting or re-loading sunrpc.ko. + * freed; this allows the sysadmin to change the pool. */ static void -svc_pool_map_put(int npools) +svc_pool_map_put(void) { struct svc_pool_map *m = &svc_pool_map; - if (npools <= 1) - return; mutex_lock(&svc_pool_map_mutex); - if (!--m->count) { kfree(m->to_pool); m->to_pool = NULL; @@ -308,7 +339,6 @@ svc_pool_map_put(int npools) m->pool_to = NULL; m->npools = 0; } - mutex_unlock(&svc_pool_map_mutex); } @@ -322,7 +352,7 @@ static int svc_pool_map_get_node(unsigned int pidx) if (m->mode == SVC_POOL_PERNODE) return m->pool_to[pidx]; } - return NUMA_NO_NODE; + return numa_mem_id(); } /* * Set the given thread's cpus_allowed mask so that it @@ -388,7 +418,7 @@ struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv) return &serv->sv_pools[pidx % serv->sv_nrpools]; } -int svc_rpcb_setup(struct svc_serv *serv, struct net *net) +static int svc_rpcb_setup(struct svc_serv *serv, struct net *net) { int err; @@ -400,21 +430,20 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net) svc_unregister(serv, net); return 0; } -EXPORT_SYMBOL_GPL(svc_rpcb_setup); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net) { svc_unregister(serv, net); rpcb_put_local(net); } -EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); static int svc_uses_rpcbind(struct svc_serv *serv) { - struct svc_program *progp; - unsigned int i; + unsigned int p, i; + + for (p = 0; p < serv->sv_nprogs; p++) { + struct svc_program *progp = &serv->sv_programs[p]; - for (progp = serv->sv_program; progp; progp = progp->pg_next) { for (i = 0; i < progp->pg_nvers; i++) { if (progp->pg_vers[i] == NULL) continue; @@ -438,9 +467,7 @@ EXPORT_SYMBOL_GPL(svc_bind); static void __svc_init_bc(struct svc_serv *serv) { - INIT_LIST_HEAD(&serv->sv_cb_list); - spin_lock_init(&serv->sv_cb_lock); - init_waitqueue_head(&serv->sv_cb_waitq); + lwq_init(&serv->sv_cb_list); } #else static void @@ -453,8 +480,8 @@ __svc_init_bc(struct svc_serv *serv) * Create an RPC service */ static struct svc_serv * -__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - int (*threadfn)(void *data)) +__svc_create(struct svc_program *prog, int nprogs, struct svc_stat *stats, + unsigned int bufsize, int npools, int (*threadfn)(void *data)) { struct svc_serv *serv; unsigned int vers; @@ -464,26 +491,27 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) return NULL; serv->sv_name = prog->pg_name; - serv->sv_program = prog; - kref_init(&serv->sv_refcnt); - serv->sv_stats = prog->pg_stats; + serv->sv_programs = prog; + serv->sv_nprogs = nprogs; + serv->sv_stats = stats; if (bufsize > RPCSVC_MAXPAYLOAD) bufsize = RPCSVC_MAXPAYLOAD; serv->sv_max_payload = bufsize? bufsize : 4096; serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); serv->sv_threadfn = threadfn; xdrsize = 0; - while (prog) { - prog->pg_lovers = prog->pg_nvers-1; - for (vers=0; vers<prog->pg_nvers ; vers++) - if (prog->pg_vers[vers]) { - prog->pg_hivers = vers; - if (prog->pg_lovers > vers) - prog->pg_lovers = vers; - if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) - xdrsize = prog->pg_vers[vers]->vs_xdrsize; + for (i = 0; i < nprogs; i++) { + struct svc_program *progp = &prog[i]; + + progp->pg_lovers = progp->pg_nvers-1; + for (vers = 0; vers < progp->pg_nvers ; vers++) + if (progp->pg_vers[vers]) { + progp->pg_hivers = vers; + if (progp->pg_lovers > vers) + progp->pg_lovers = vers; + if (progp->pg_vers[vers]->vs_xdrsize > xdrsize) + xdrsize = progp->pg_vers[vers]->vs_xdrsize; } - prog = prog->pg_next; } serv->sv_xdrsize = xdrsize; INIT_LIST_HEAD(&serv->sv_tempsocks); @@ -509,9 +537,13 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, i, serv->sv_name); pool->sp_id = i; - INIT_LIST_HEAD(&pool->sp_sockets); + lwq_init(&pool->sp_xprts); INIT_LIST_HEAD(&pool->sp_all_threads); - spin_lock_init(&pool->sp_lock); + init_llist_head(&pool->sp_idle_threads); + + percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); + percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); + percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); } return serv; @@ -528,31 +560,36 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize, int (*threadfn)(void *data)) { - return __svc_create(prog, bufsize, 1, threadfn); + return __svc_create(prog, 1, NULL, bufsize, 1, threadfn); } EXPORT_SYMBOL_GPL(svc_create); /** * svc_create_pooled - Create an RPC service with pooled threads - * @prog: the RPC program the new service will handle + * @prog: Array of RPC programs the new service will handle + * @nprogs: Number of programs in the array + * @stats: the stats struct if desired * @bufsize: maximum message size for @prog * @threadfn: a function to service RPC requests for @prog * * Returns an instantiated struct svc_serv object or NULL. */ struct svc_serv *svc_create_pooled(struct svc_program *prog, + unsigned int nprogs, + struct svc_stat *stats, unsigned int bufsize, int (*threadfn)(void *data)) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, threadfn); + serv = __svc_create(prog, nprogs, stats, bufsize, npools, threadfn); if (!serv) goto out_err; + serv->sv_is_pooled = true; return serv; out_err: - svc_pool_map_put(npools); + svc_pool_map_put(); return NULL; } EXPORT_SYMBOL_GPL(svc_create_pooled); @@ -562,57 +599,54 @@ EXPORT_SYMBOL_GPL(svc_create_pooled); * protect sv_permsocks and sv_tempsocks. */ void -svc_destroy(struct kref *ref) +svc_destroy(struct svc_serv **servp) { - struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt); + struct svc_serv *serv = *servp; + unsigned int i; - dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name); + *servp = NULL; + + dprintk("svc: svc_destroy(%s)\n", serv->sv_programs->pg_name); timer_shutdown_sync(&serv->sv_temptimer); /* - * The last user is gone and thus all sockets have to be destroyed to - * the point. Check this. + * Remaining transports at this point are not expected. */ - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); + WARN_ONCE(!list_empty(&serv->sv_permsocks), + "SVC: permsocks remain for %s\n", serv->sv_programs->pg_name); + WARN_ONCE(!list_empty(&serv->sv_tempsocks), + "SVC: tempsocks remain for %s\n", serv->sv_programs->pg_name); cache_clean_deferred(serv); - svc_pool_map_put(serv->sv_nrpools); + if (serv->sv_is_pooled) + svc_pool_map_put(); + + for (i = 0; i < serv->sv_nrpools; i++) { + struct svc_pool *pool = &serv->sv_pools[i]; + percpu_counter_destroy(&pool->sp_messages_arrived); + percpu_counter_destroy(&pool->sp_sockets_queued); + percpu_counter_destroy(&pool->sp_threads_woken); + } kfree(serv->sv_pools); kfree(serv); } EXPORT_SYMBOL_GPL(svc_destroy); -/* - * Allocate an RPC server's buffer space. - * We allocate pages and place them in rq_pages. - */ -static int -svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) +static bool +svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node) { - unsigned int pages, arghi; - - /* bc_xprt uses fore channel allocated buffers */ - if (svc_is_backchannel(rqstp)) - return 1; - - pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. - * We assume one is at most one page - */ - arghi = 0; - WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); - if (pages > RPCSVC_MAXPAGES) - pages = RPCSVC_MAXPAGES; - while (pages) { - struct page *p = alloc_pages_node(node, GFP_KERNEL, 0); - if (!p) - break; - rqstp->rq_pages[arghi++] = p; - pages--; - } - return pages == 0; + rqstp->rq_maxpages = svc_serv_maxpages(serv); + + /* rq_pages' last entry is NULL for historical reasons. */ + rqstp->rq_pages = kcalloc_node(rqstp->rq_maxpages + 1, + sizeof(struct page *), + GFP_KERNEL, node); + if (!rqstp->rq_pages) + return false; + + return true; } /* @@ -621,15 +655,30 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) static void svc_release_buffer(struct svc_rqst *rqstp) { - unsigned int i; + unsigned long i; - for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++) + for (i = 0; i < rqstp->rq_maxpages; i++) if (rqstp->rq_pages[i]) put_page(rqstp->rq_pages[i]); + kfree(rqstp->rq_pages); } -struct svc_rqst * -svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) +static void +svc_rqst_free(struct svc_rqst *rqstp) +{ + folio_batch_release(&rqstp->rq_fbatch); + kfree(rqstp->rq_bvec); + svc_release_buffer(rqstp); + if (rqstp->rq_scratch_folio) + folio_put(rqstp->rq_scratch_folio); + kfree(rqstp->rq_resp); + kfree(rqstp->rq_argp); + kfree(rqstp->rq_auth_data); + kfree_rcu(rqstp, rq_rcu_head); +} + +static struct svc_rqst * +svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; @@ -637,12 +686,13 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return rqstp; - __set_bit(RQ_BUSY, &rqstp->rq_flags); + folio_batch_init(&rqstp->rq_fbatch); + rqstp->rq_server = serv; rqstp->rq_pool = pool; - rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0); - if (!rqstp->rq_scratch_page) + rqstp->rq_scratch_folio = __folio_alloc_node(GFP_KERNEL, 0, node); + if (!rqstp->rq_scratch_folio) goto out_enomem; rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); @@ -653,91 +703,97 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp->rq_resp) goto out_enomem; - if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) + if (!svc_init_buffer(rqstp, serv, node)) goto out_enomem; - return rqstp; -out_enomem: - svc_rqst_free(rqstp); - return NULL; -} -EXPORT_SYMBOL_GPL(svc_rqst_alloc); - -static struct svc_rqst * -svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) -{ - struct svc_rqst *rqstp; + rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages, + sizeof(struct bio_vec), + GFP_KERNEL, node); + if (!rqstp->rq_bvec) + goto out_enomem; - rqstp = svc_rqst_alloc(serv, pool, node); - if (!rqstp) - return ERR_PTR(-ENOMEM); + rqstp->rq_err = -EAGAIN; /* No error yet */ - svc_get(serv); - spin_lock_bh(&serv->sv_lock); serv->sv_nrthreads += 1; - spin_unlock_bh(&serv->sv_lock); + pool->sp_nrthreads += 1; - spin_lock_bh(&pool->sp_lock); - pool->sp_nrthreads++; + /* Protected by whatever lock the service uses when calling + * svc_set_num_threads() + */ list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); - spin_unlock_bh(&pool->sp_lock); + return rqstp; + +out_enomem: + svc_rqst_free(rqstp); + return NULL; } -/* - * Choose a pool in which to create a new thread, for svc_set_num_threads +/** + * svc_pool_wake_idle_thread - Awaken an idle thread in @pool + * @pool: service thread pool + * + * Can be called from soft IRQ or process context. Finding an idle + * service thread and marking it BUSY is atomic with respect to + * other calls to svc_pool_wake_idle_thread(). + * */ -static inline struct svc_pool * -choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +void svc_pool_wake_idle_thread(struct svc_pool *pool) { - if (pool != NULL) - return pool; + struct svc_rqst *rqstp; + struct llist_node *ln; + + rcu_read_lock(); + ln = READ_ONCE(pool->sp_idle_threads.first); + if (ln) { + rqstp = llist_entry(ln, struct svc_rqst, rq_idle); + WRITE_ONCE(rqstp->rq_qtime, ktime_get()); + if (!task_is_running(rqstp->rq_task)) { + wake_up_process(rqstp->rq_task); + trace_svc_pool_thread_wake(pool, rqstp->rq_task->pid); + percpu_counter_inc(&pool->sp_threads_woken); + } else { + trace_svc_pool_thread_running(pool, rqstp->rq_task->pid); + } + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + trace_svc_pool_thread_noidle(pool, 0); +} +EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread); - return &serv->sv_pools[(*state)++ % serv->sv_nrpools]; +static struct svc_pool * +svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +{ + return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools]; } -/* - * Choose a thread to kill, for svc_set_num_threads - */ -static inline struct task_struct * -choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +static struct svc_pool * +svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, + unsigned int *state) { + struct svc_pool *pool; unsigned int i; - struct task_struct *task = NULL; - if (pool != NULL) { - spin_lock_bh(&pool->sp_lock); - } else { - /* choose a pool in round-robin fashion */ + pool = target_pool; + + if (!pool) { for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; - spin_lock_bh(&pool->sp_lock); - if (!list_empty(&pool->sp_all_threads)) - goto found_pool; - spin_unlock_bh(&pool->sp_lock); + if (pool->sp_nrthreads) + break; } - return NULL; } -found_pool: - if (!list_empty(&pool->sp_all_threads)) { - struct svc_rqst *rqstp; - - /* - * Remove from the pool->sp_all_threads list - * so we don't try to kill it again. - */ - rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); - set_bit(RQ_VICTIM, &rqstp->rq_flags); - list_del_rcu(&rqstp->rq_all); - task = rqstp->rq_task; + if (pool && pool->sp_nrthreads) { + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); + set_bit(SP_NEED_VICTIM, &pool->sp_flags); + return pool; } - spin_unlock_bh(&pool->sp_lock); - - return task; + return NULL; } -/* create new threads */ static int svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { @@ -746,16 +802,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) struct svc_pool *chosen_pool; unsigned int state = serv->sv_nrthreads-1; int node; + int err; do { nrservs--; - chosen_pool = choose_pool(serv, pool, &state); - + chosen_pool = svc_pool_next(serv, pool, &state); node = svc_pool_map_get_node(chosen_pool->sp_id); - rqstp = svc_prepare_thread(serv, chosen_pool, node); - if (IS_ERR(rqstp)) - return PTR_ERR(rqstp); + rqstp = svc_prepare_thread(serv, chosen_pool, node); + if (!rqstp) + return -ENOMEM; task = kthread_create_on_node(serv->sv_threadfn, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { @@ -769,47 +825,60 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) svc_sock_update_bufs(serv); wake_up_process(task); + + wait_var_event(&rqstp->rq_err, rqstp->rq_err != -EAGAIN); + err = rqstp->rq_err; + if (err) { + svc_exit_thread(rqstp); + return err; + } } while (nrservs > 0); return 0; } -/* - * Create or destroy enough new threads to make the number - * of threads the given number. If `pool' is non-NULL, applies - * only to threads in that pool, otherwise round-robins between - * all pools. Caller must ensure that mutual exclusion between this and - * server startup or shutdown. - */ - -/* destroy old threads */ static int svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - struct task_struct *task; unsigned int state = serv->sv_nrthreads-1; + struct svc_pool *victim; - /* destroy old threads */ do { - task = choose_victim(serv, pool, &state); - if (task == NULL) + victim = svc_pool_victim(serv, pool, &state); + if (!victim) break; - kthread_stop(task); + svc_pool_wake_idle_thread(victim); + wait_on_bit(&victim->sp_flags, SP_VICTIM_REMAINS, + TASK_IDLE); nrservs++; } while (nrservs < 0); return 0; } +/** + * svc_set_num_threads - adjust number of threads per RPC service + * @serv: RPC service to adjust + * @pool: Specific pool from which to choose threads, or NULL + * @nrservs: New number of threads for @serv (0 or less means kill all threads) + * + * Create or destroy threads to make the number of threads for @serv the + * given number. If @pool is non-NULL, change only threads in that pool; + * otherwise, round-robin between all pools for @serv. @serv's + * sv_nrthreads is adjusted for each thread created or destroyed. + * + * Caller must ensure mutual exclusion between this and server startup or + * shutdown. + * + * Returns zero on success or a negative errno if an error occurred while + * starting a thread. + */ int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - if (pool == NULL) { + if (!pool) nrservs -= serv->sv_nrthreads; - } else { - spin_lock_bh(&pool->sp_lock); + else nrservs -= pool->sp_nrthreads; - spin_unlock_bh(&pool->sp_lock); - } if (nrservs > 0) return svc_start_kthreads(serv, pool, nrservs); @@ -826,57 +895,80 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); * * When replacing a page in rq_pages, batch the release of the * replaced pages to avoid hammering the page allocator. + * + * Return values: + * %true: page replaced + * %false: array bounds checking failed */ -void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) +bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) { + struct page **begin = rqstp->rq_pages; + struct page **end = &rqstp->rq_pages[rqstp->rq_maxpages]; + + if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) { + trace_svc_replace_page_err(rqstp); + return false; + } + if (*rqstp->rq_next_page) { - if (!pagevec_space(&rqstp->rq_pvec)) - __pagevec_release(&rqstp->rq_pvec); - pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page); + if (!folio_batch_add(&rqstp->rq_fbatch, + page_folio(*rqstp->rq_next_page))) + __folio_batch_release(&rqstp->rq_fbatch); } get_page(page); *(rqstp->rq_next_page++) = page; + return true; } EXPORT_SYMBOL_GPL(svc_rqst_replace_page); -/* - * Called from a server thread as it's exiting. Caller must hold the "service - * mutex" for the service. +/** + * svc_rqst_release_pages - Release Reply buffer pages + * @rqstp: RPC transaction context + * + * Release response pages that might still be in flight after + * svc_send, and any spliced filesystem-owned pages. */ -void -svc_rqst_free(struct svc_rqst *rqstp) +void svc_rqst_release_pages(struct svc_rqst *rqstp) { - svc_release_buffer(rqstp); - if (rqstp->rq_scratch_page) - put_page(rqstp->rq_scratch_page); - kfree(rqstp->rq_resp); - kfree(rqstp->rq_argp); - kfree(rqstp->rq_auth_data); - kfree_rcu(rqstp, rq_rcu_head); + int i, count = rqstp->rq_next_page - rqstp->rq_respages; + + if (count) { + release_pages(rqstp->rq_respages, count); + for (i = 0; i < count; i++) + rqstp->rq_respages[i] = NULL; + } } -EXPORT_SYMBOL_GPL(svc_rqst_free); +/** + * svc_exit_thread - finalise the termination of a sunrpc server thread + * @rqstp: the svc_rqst which represents the thread. + * + * When a thread started with svc_new_thread() exits it must call + * svc_exit_thread() as its last act. This must be done with the + * service mutex held. Normally this is held by a DIFFERENT thread, the + * one that is calling svc_set_num_threads() and which will wait for + * SP_VICTIM_REMAINS to be cleared before dropping the mutex. If the + * thread exits for any reason other than svc_thread_should_stop() + * returning %true (which indicated that svc_set_num_threads() is + * waiting for it to exit), then it must take the service mutex itself, + * which can only safely be done using mutex_try_lock(). + */ void svc_exit_thread(struct svc_rqst *rqstp) { struct svc_serv *serv = rqstp->rq_server; struct svc_pool *pool = rqstp->rq_pool; - spin_lock_bh(&pool->sp_lock); - pool->sp_nrthreads--; - if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags)) - list_del_rcu(&rqstp->rq_all); - spin_unlock_bh(&pool->sp_lock); + list_del_rcu(&rqstp->rq_all); - spin_lock_bh(&serv->sv_lock); + pool->sp_nrthreads -= 1; serv->sv_nrthreads -= 1; - spin_unlock_bh(&serv->sv_lock); svc_sock_update_bufs(serv); svc_rqst_free(rqstp); - svc_put(serv); + clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags); } EXPORT_SYMBOL_GPL(svc_exit_thread); @@ -1002,10 +1094,11 @@ static int __svc_register(struct net *net, const char *progname, #endif } - trace_svc_register(progname, version, protocol, port, family, error); + trace_svc_register(progname, version, family, protocol, port, error); return error; } +static int svc_rpcbind_set_version(struct net *net, const struct svc_program *progp, u32 version, int family, @@ -1016,7 +1109,6 @@ int svc_rpcbind_set_version(struct net *net, version, family, proto, port); } -EXPORT_SYMBOL_GPL(svc_rpcbind_set_version); int svc_generic_rpcbind_set(struct net *net, const struct svc_program *progp, @@ -1064,15 +1156,16 @@ int svc_register(const struct svc_serv *serv, struct net *net, const int family, const unsigned short proto, const unsigned short port) { - struct svc_program *progp; - unsigned int i; + unsigned int p, i; int error = 0; WARN_ON_ONCE(proto == 0 && port == 0); if (proto == 0 && port == 0) return -EINVAL; - for (progp = serv->sv_program; progp; progp = progp->pg_next) { + for (p = 0; p < serv->sv_nprogs; p++) { + struct svc_program *progp = &serv->sv_programs[p]; + for (i = 0; i < progp->pg_nvers; i++) { error = progp->pg_rpcbind_set(net, progp, i, @@ -1123,13 +1216,15 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi */ static void svc_unregister(const struct svc_serv *serv, struct net *net) { - struct svc_program *progp; + struct sighand_struct *sighand; unsigned long flags; - unsigned int i; + unsigned int p, i; clear_thread_flag(TIF_SIGPENDING); - for (progp = serv->sv_program; progp; progp = progp->pg_next) { + for (p = 0; p < serv->sv_nprogs; p++) { + struct svc_program *progp = &serv->sv_programs[p]; + for (i = 0; i < progp->pg_nvers; i++) { if (progp->pg_vers[i] == NULL) continue; @@ -1139,9 +1234,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) } } - spin_lock_irqsave(¤t->sighand->siglock, flags); + rcu_read_lock(); + sighand = rcu_dereference(current->sighand); + spin_lock_irqsave(&sighand->siglock, flags); recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); + spin_unlock_irqrestore(&sighand->siglock, flags); + rcu_read_unlock(); } /* @@ -1200,15 +1298,13 @@ svc_generic_init_request(struct svc_rqst *rqstp, if (rqstp->rq_proc >= versp->vs_nproc) goto err_bad_proc; rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc]; - if (!procp) - goto err_bad_proc; /* Initialize storage for argp and resp */ memset(rqstp->rq_argp, 0, procp->pc_argzero); memset(rqstp->rq_resp, 0, procp->pc_ressize); /* Bump per-procedure stats counter */ - versp->vs_count[rqstp->rq_proc]++; + this_cpu_inc(versp->vs_count[rqstp->rq_proc]); ret->dispatch = versp->vs_dispatch; return rpc_success; @@ -1225,51 +1321,45 @@ EXPORT_SYMBOL_GPL(svc_generic_init_request); * Common routine for processing the RPC request. */ static int -svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) +svc_process_common(struct svc_rqst *rqstp) { - struct svc_program *progp; + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct svc_program *progp = NULL; const struct svc_procedure *procp = NULL; struct svc_serv *serv = rqstp->rq_server; struct svc_process_info process; - __be32 *statp; - u32 prog, vers; - __be32 rpc_stat; - int auth_res, rc; - __be32 *reply_statp; + enum svc_auth_status auth_res; + unsigned int aoffset; + int pr, rc; + __be32 *p; - rpc_stat = rpc_success; + /* Reset the accept_stat for the RPC */ + rqstp->rq_accept_statp = NULL; - if (argv->iov_len < 6*4) - goto err_short_len; - - /* Will be turned off by GSS integrity and privacy services */ - set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Will be turned off only when NFSv4 Sessions are used */ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); clear_bit(RQ_DROPME, &rqstp->rq_flags); - svc_putu32(resv, rqstp->rq_xid); - - vers = svc_getnl(argv); + /* Construct the first words of the reply: */ + svcxdr_init_encode(rqstp); + xdr_stream_encode_be32(xdr, rqstp->rq_xid); + xdr_stream_encode_be32(xdr, rpc_reply); - /* First words of reply: */ - svc_putnl(resv, 1); /* REPLY */ - - if (vers != 2) /* RPC version number */ + p = xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 4); + if (unlikely(!p)) + goto err_short_len; + if (*p++ != cpu_to_be32(RPC_VERSION)) goto err_bad_rpc; - /* Save position in case we later decide to reject: */ - reply_statp = resv->iov_base + resv->iov_len; + xdr_stream_encode_be32(xdr, rpc_msg_accepted); - svc_putnl(resv, 0); /* ACCEPT */ + rqstp->rq_prog = be32_to_cpup(p++); + rqstp->rq_vers = be32_to_cpup(p++); + rqstp->rq_proc = be32_to_cpup(p); - rqstp->rq_prog = prog = svc_getnl(argv); /* program number */ - rqstp->rq_vers = svc_getnl(argv); /* version number */ - rqstp->rq_proc = svc_getnl(argv); /* procedure number */ - - for (progp = serv->sv_program; progp; progp = progp->pg_next) - if (prog == progp->pg_prog) - break; + for (pr = 0; pr < serv->sv_nprogs; pr++) + if (rqstp->rq_prog == serv->sv_programs[pr].pg_prog) + progp = &serv->sv_programs[pr]; /* * Decode auth data, and add verifier to reply buffer. @@ -1285,10 +1375,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) case SVC_OK: break; case SVC_GARBAGE: - goto err_garbage; - case SVC_SYSERR: - rpc_stat = rpc_system_err; - goto err_bad; + rqstp->rq_auth_stat = rpc_autherr_badcred; + goto err_bad_auth; case SVC_DENIED: goto err_bad_auth; case SVC_CLOSE: @@ -1297,13 +1385,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto dropit; case SVC_COMPLETE: goto sendit; + default: + pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res); + rqstp->rq_auth_stat = rpc_autherr_failed; + goto err_bad_auth; } if (progp == NULL) goto err_bad_prog; - rpc_stat = progp->pg_init_request(rqstp, progp, &process); - switch (rpc_stat) { + switch (progp->pg_init_request(rqstp, progp, &process)) { case rpc_success: break; case rpc_prog_unavail: @@ -1320,12 +1411,11 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_bad_proc; /* Syntactic check complete */ - serv->sv_stats->rpccnt++; + if (serv->sv_stats) + serv->sv_stats->rpccnt++; trace_svc_process(rqstp, progp->pg_name); - /* Build the reply header. */ - statp = resv->iov_base +resv->iov_len; - svc_putnl(resv, RPC_SUCCESS); + aoffset = xdr_stream_pos(xdr); /* un-reserve some of the out-queue now that we have a * better idea of reply size @@ -1334,17 +1424,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_reserve_auth(rqstp, procp->pc_xdrressize<<2); /* Call the function that processes the request. */ - rc = process.dispatch(rqstp, statp); - if (procp->pc_release) - procp->pc_release(rqstp); + rc = process.dispatch(rqstp); + xdr_finish_decode(xdr); + if (!rc) goto dropit; if (rqstp->rq_auth_stat != rpc_auth_ok) goto err_bad_auth; - /* Check RPC status result */ - if (*statp != rpc_success) - resv->iov_len = ((void*)statp) - resv->iov_base + 4; + if (*rqstp->rq_accept_statp != rpc_success) + xdr_truncate_encode(xdr, aoffset); if (procp->pc_encode == NULL) goto dropit; @@ -1368,71 +1457,89 @@ close_xprt: return 0; err_short_len: - svc_printk(rqstp, "short len %zd, dropping request\n", - argv->iov_len); + svc_printk(rqstp, "short len %u, dropping request\n", + rqstp->rq_arg.len); goto close_xprt; err_bad_rpc: - serv->sv_stats->rpcbadfmt++; - svc_putnl(resv, 1); /* REJECT */ - svc_putnl(resv, 0); /* RPC_MISMATCH */ - svc_putnl(resv, 2); /* Only RPCv2 supported */ - svc_putnl(resv, 2); - goto sendit; + if (serv->sv_stats) + serv->sv_stats->rpcbadfmt++; + xdr_stream_encode_u32(xdr, RPC_MSG_DENIED); + xdr_stream_encode_u32(xdr, RPC_MISMATCH); + /* Only RPCv2 supported */ + xdr_stream_encode_u32(xdr, RPC_VERSION); + xdr_stream_encode_u32(xdr, RPC_VERSION); + return 1; /* don't wrap */ err_bad_auth: dprintk("svc: authentication failed (%d)\n", be32_to_cpu(rqstp->rq_auth_stat)); - serv->sv_stats->rpcbadauth++; - /* Restore write pointer to location of accept status: */ - xdr_ressize_check(rqstp, reply_statp); - svc_putnl(resv, 1); /* REJECT */ - svc_putnl(resv, 1); /* AUTH_ERROR */ - svc_putu32(resv, rqstp->rq_auth_stat); /* status */ + if (serv->sv_stats) + serv->sv_stats->rpcbadauth++; + /* Restore write pointer to location of reply status: */ + xdr_truncate_encode(xdr, XDR_UNIT * 2); + xdr_stream_encode_u32(xdr, RPC_MSG_DENIED); + xdr_stream_encode_u32(xdr, RPC_AUTH_ERROR); + xdr_stream_encode_be32(xdr, rqstp->rq_auth_stat); goto sendit; err_bad_prog: - dprintk("svc: unknown program %d\n", prog); - serv->sv_stats->rpcbadfmt++; - svc_putnl(resv, RPC_PROG_UNAVAIL); + dprintk("svc: unknown program %d\n", rqstp->rq_prog); + if (serv->sv_stats) + serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_prog_unavail; goto sendit; err_bad_vers: svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n", rqstp->rq_vers, rqstp->rq_prog, progp->pg_name); - serv->sv_stats->rpcbadfmt++; - svc_putnl(resv, RPC_PROG_MISMATCH); - svc_putnl(resv, process.mismatch.lovers); - svc_putnl(resv, process.mismatch.hivers); + if (serv->sv_stats) + serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_prog_mismatch; + + /* + * svc_authenticate() has already added the verifier and + * advanced the stream just past rq_accept_statp. + */ + xdr_stream_encode_u32(xdr, process.mismatch.lovers); + xdr_stream_encode_u32(xdr, process.mismatch.hivers); goto sendit; err_bad_proc: svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc); - serv->sv_stats->rpcbadfmt++; - svc_putnl(resv, RPC_PROC_UNAVAIL); + if (serv->sv_stats) + serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_proc_unavail; goto sendit; +} + +/* + * Drop request + */ +static void svc_drop(struct svc_rqst *rqstp) +{ + trace_svc_drop(rqstp); +} -err_garbage: - svc_printk(rqstp, "failed to decode args\n"); +static void svc_release_rqst(struct svc_rqst *rqstp) +{ + const struct svc_procedure *procp = rqstp->rq_procinfo; - rpc_stat = rpc_garbage_args; -err_bad: - serv->sv_stats->rpcbadfmt++; - svc_putnl(resv, ntohl(rpc_stat)); - goto sendit; + if (procp && procp->pc_release) + procp->pc_release(rqstp); } -/* - * Process the RPC request. +/** + * svc_process - Execute one RPC transaction + * @rqstp: RPC transaction context + * */ -int -svc_process(struct svc_rqst *rqstp) +void svc_process(struct svc_rqst *rqstp) { - struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; - __be32 dir; + __be32 *p; #if IS_ENABLED(CONFIG_FAIL_SUNRPC) if (!fail_sunrpc.ignore_server_disconnect && @@ -1455,44 +1562,49 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; - dir = svc_getu32(argv); - if (dir != rpc_call) + svcxdr_init_decode(rqstp); + p = xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2); + if (unlikely(!p)) + goto out_drop; + rqstp->rq_xid = *p++; + if (unlikely(*p != rpc_call)) goto out_baddir; - if (!svc_process_common(rqstp, argv, resv)) + + if (!svc_process_common(rqstp)) { + svc_release_rqst(rqstp); goto out_drop; - return svc_send(rqstp); + } + svc_send(rqstp); + svc_release_rqst(rqstp); + return; out_baddir: svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", - be32_to_cpu(dir)); - rqstp->rq_server->sv_stats->rpcbadfmt++; + be32_to_cpu(*p)); + if (rqstp->rq_server->sv_stats) + rqstp->rq_server->sv_stats->rpcbadfmt++; out_drop: svc_drop(rqstp); - return 0; } -EXPORT_SYMBOL_GPL(svc_process); #if defined(CONFIG_SUNRPC_BACKCHANNEL) -/* - * Process a backchannel RPC request that arrived over an existing - * outbound connection +/** + * svc_process_bc - process a reverse-direction RPC request + * @req: RPC request to be used for client-side processing + * @rqstp: server-side execution context + * */ -int -bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, - struct svc_rqst *rqstp) +void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) { - struct kvec *argv = &rqstp->rq_arg.head[0]; - struct kvec *resv = &rqstp->rq_res.head[0]; + struct rpc_timeout timeout = { + .to_increment = 0, + }; struct rpc_task *task; int proc_error; - int error; - - dprintk("svc: %s(%p)\n", __func__, req); /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xid = req->rq_xid; rqstp->rq_prot = req->rq_xprt->prot; - rqstp->rq_server = serv; rqstp->rq_bc_net = req->rq_xprt->xprt_net; rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); @@ -1513,43 +1625,46 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, rqstp->rq_arg.len = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len; - /* reset result send buffer "put" position */ - resv->iov_len = 0; + /* Reset the response buffer */ + rqstp->rq_res.head[0].iov_len = 0; /* - * Skip the next two words because they've already been - * processed in the transport + * Skip the XID and calldir fields because they've already + * been processed by the caller. */ - svc_getu32(argv); /* XID */ - svc_getnl(argv); /* CALLDIR */ + svcxdr_init_decode(rqstp); + if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2)) + return; /* Parse and execute the bc call */ - proc_error = svc_process_common(rqstp, argv, resv); + proc_error = svc_process_common(rqstp); atomic_dec(&req->rq_xprt->bc_slot_count); if (!proc_error) { /* Processing error: drop the request */ xprt_free_bc_request(req); - error = -EINVAL; - goto out; + svc_release_rqst(rqstp); + return; } /* Finally, send the reply synchronously */ - memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); - task = rpc_run_bc_task(req); - if (IS_ERR(task)) { - error = PTR_ERR(task); - goto out; + if (rqstp->bc_to_initval > 0) { + timeout.to_initval = rqstp->bc_to_initval; + timeout.to_retries = rqstp->bc_to_retries; + } else { + timeout.to_initval = req->rq_xprt->timeout->to_initval; + timeout.to_retries = req->rq_xprt->timeout->to_retries; } + timeout.to_maxval = timeout.to_initval; + memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); + task = rpc_run_bc_task(req, &timeout); + svc_release_rqst(rqstp); + + if (IS_ERR(task)) + return; WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); - error = task->tk_status; rpc_put_task(task); - -out: - dprintk("svc: %s(), error=%d\n", __func__, error); - return error; } -EXPORT_SYMBOL_GPL(bc_svc_process); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ /** @@ -1602,46 +1717,6 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, EXPORT_SYMBOL_GPL(svc_encode_result_payload); /** - * svc_fill_write_vector - Construct data argument for VFS write call - * @rqstp: svc_rqst to operate on - * @payload: xdr_buf containing only the write data payload - * - * Fills in rqstp::rq_vec, and returns the number of elements. - */ -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct xdr_buf *payload) -{ - struct page **pages = payload->pages; - struct kvec *first = payload->head; - struct kvec *vec = rqstp->rq_vec; - size_t total = payload->len; - unsigned int i; - - /* Some types of transport can present the write payload - * entirely in rq_arg.pages. In this case, @first is empty. - */ - i = 0; - if (first->iov_len) { - vec[i].iov_base = first->iov_base; - vec[i].iov_len = min_t(size_t, total, first->iov_len); - total -= vec[i].iov_len; - ++i; - } - - while (total) { - vec[i].iov_base = page_address(*pages); - vec[i].iov_len = min_t(size_t, total, PAGE_SIZE); - total -= vec[i].iov_len; - ++i; - ++pages; - } - - WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec)); - return i; -} -EXPORT_SYMBOL_GPL(svc_fill_write_vector); - -/** * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call * @rqstp: svc_rqst to operate on * @first: buffer containing first section of pathname |
