diff options
Diffstat (limited to 'kernel/padata.c')
-rw-r--r-- | kernel/padata.c | 139 |
1 files changed, 92 insertions, 47 deletions
diff --git a/kernel/padata.c b/kernel/padata.c index d4d3ba6e1728..418987056340 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -9,19 +9,6 @@ * * Copyright (c) 2020 Oracle and/or its affiliates. * Author: Daniel Jordan <daniel.m.jordan@oracle.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #include <linux/completion.h> @@ -60,6 +47,22 @@ struct padata_mt_job_state { static void padata_free_pd(struct parallel_data *pd); static void __init padata_mt_helper(struct work_struct *work); +static inline void padata_get_pd(struct parallel_data *pd) +{ + refcount_inc(&pd->refcnt); +} + +static inline void padata_put_pd_cnt(struct parallel_data *pd, int cnt) +{ + if (refcount_sub_and_test(cnt, &pd->refcnt)) + padata_free_pd(pd); +} + +static inline void padata_put_pd(struct parallel_data *pd) +{ + padata_put_pd_cnt(pd, 1); +} + static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { int cpu, target_cpu; @@ -96,8 +99,16 @@ static struct padata_work *padata_work_alloc(void) return pw; } -static void padata_work_init(struct padata_work *pw, work_func_t work_fn, - void *data, int flags) +/* + * This function is marked __ref because this function may be optimized in such + * a way that it directly refers to work_fn's address, which causes modpost to + * complain when work_fn is marked __init. This scenario was observed with clang + * LTO, where padata_work_init() was optimized to refer directly to + * padata_mt_helper() because the calls to padata_work_init() with other work_fn + * values were eliminated or inlined. + */ +static void __ref padata_work_init(struct padata_work *pw, work_func_t work_fn, + void *data, int flags) { if (flags & PADATA_WORK_ONSTACK) INIT_WORK_ONSTACK(&pw->pw_work, work_fn); @@ -111,7 +122,7 @@ static int __init padata_work_alloc_mt(int nworks, void *data, { int i; - spin_lock(&padata_works_lock); + spin_lock_bh(&padata_works_lock); /* Start at 1 because the current task participates in the job. */ for (i = 1; i < nworks; ++i) { struct padata_work *pw = padata_work_alloc(); @@ -121,7 +132,7 @@ static int __init padata_work_alloc_mt(int nworks, void *data, padata_work_init(pw, padata_mt_helper, data, 0); list_add(&pw->pw_list, head); } - spin_unlock(&padata_works_lock); + spin_unlock_bh(&padata_works_lock); return i; } @@ -139,12 +150,12 @@ static void __init padata_works_free(struct list_head *works) if (list_empty(works)) return; - spin_lock(&padata_works_lock); + spin_lock_bh(&padata_works_lock); list_for_each_entry_safe(cur, next, works, pw_list) { list_del(&cur->pw_list); padata_work_free(cur); } - spin_unlock(&padata_works_lock); + spin_unlock_bh(&padata_works_lock); } static void padata_parallel_worker(struct work_struct *parallel_work) @@ -194,7 +205,7 @@ int padata_do_parallel(struct padata_shell *ps, goto out; if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) { - if (!cpumask_weight(pd->cpumask.cbcpu)) + if (cpumask_empty(pd->cpumask.cbcpu)) goto out; /* Select an alternate fallback CPU and notify the caller. */ @@ -207,11 +218,11 @@ int padata_do_parallel(struct padata_shell *ps, *cb_cpu = cpu; } - err = -EBUSY; + err = -EBUSY; if ((pinst->flags & PADATA_RESET)) goto out; - atomic_inc(&pd->refcnt); + padata_get_pd(pd); padata->pd = pd; padata->cb_cpu = *cb_cpu; @@ -220,14 +231,16 @@ int padata_do_parallel(struct padata_shell *ps, pw = padata_work_alloc(); spin_unlock(&padata_works_lock); + if (!pw) { + /* Maximum works limit exceeded, run in the current task. */ + padata->parallel(padata); + } + rcu_read_unlock_bh(); if (pw) { padata_work_init(pw, padata_parallel_worker, padata, 0); queue_work(pinst->parallel_wq, &pw->pw_work); - } else { - /* Maximum works limit exceeded, run in the current task. */ - padata->parallel(padata); } return 0; @@ -339,8 +352,14 @@ static void padata_reorder(struct parallel_data *pd) smp_mb(); reorder = per_cpu_ptr(pd->reorder_list, pd->cpu); - if (!list_empty(&reorder->list) && padata_find_next(pd, false)) + if (!list_empty(&reorder->list) && padata_find_next(pd, false)) { + /* + * Other context(eg. the padata_serial_worker) can finish the request. + * To avoid UAF issue, add pd ref here, and put pd ref after reorder_work finish. + */ + padata_get_pd(pd); queue_work(pinst->serial_wq, &pd->reorder_work); + } } static void invoke_padata_reorder(struct work_struct *work) @@ -351,6 +370,8 @@ static void invoke_padata_reorder(struct work_struct *work) pd = container_of(work, struct parallel_data, reorder_work); padata_reorder(pd); local_bh_enable(); + /* Pairs with putting the reorder_work in the serial_wq */ + padata_put_pd(pd); } static void padata_serial_worker(struct work_struct *serial_work) @@ -383,8 +404,7 @@ static void padata_serial_worker(struct work_struct *serial_work) } local_bh_enable(); - if (atomic_sub_and_test(cnt, &pd->refcnt)) - padata_free_pd(pd); + padata_put_pd_cnt(pd, cnt); } /** @@ -401,13 +421,17 @@ void padata_do_serial(struct padata_priv *padata) int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr); struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu); struct padata_priv *cur; + struct list_head *pos; spin_lock(&reorder->lock); /* Sort in ascending order of sequence number. */ - list_for_each_entry_reverse(cur, &reorder->list, list) - if (cur->seq_nr < padata->seq_nr) + list_for_each_prev(pos, &reorder->list) { + cur = list_entry(pos, struct padata_priv, list); + /* Compare by difference to consider integer wrap around */ + if ((signed int)(cur->seq_nr - padata->seq_nr) < 0) break; - list_add(&padata->list, &cur->list); + } + list_add(&padata->list, pos); spin_unlock(&reorder->lock); /* @@ -485,13 +509,14 @@ void __init padata_do_multithreaded(struct padata_mt_job *job) struct padata_work my_work, *pw; struct padata_mt_job_state ps; LIST_HEAD(works); - int nworks; + int nworks, nid; + static atomic_t last_used_nid __initdata; if (job->size == 0) return; /* Ensure at least one thread when size < min_chunk. */ - nworks = max(job->size / job->min_chunk, 1ul); + nworks = max(job->size / max(job->min_chunk, job->align), 1ul); nworks = min(nworks, job->max_threads); if (nworks == 1) { @@ -511,13 +536,25 @@ void __init padata_do_multithreaded(struct padata_mt_job *job) * thread function. Load balance large jobs between threads by * increasing the number of chunks, guarantee at least the minimum * chunk size from the caller, and honor the caller's alignment. + * Ensure chunk_size is at least 1 to prevent divide-by-0 + * panic in padata_mt_helper(). */ ps.chunk_size = job->size / (ps.nworks * load_balance_factor); ps.chunk_size = max(ps.chunk_size, job->min_chunk); + ps.chunk_size = max(ps.chunk_size, 1ul); ps.chunk_size = roundup(ps.chunk_size, job->align); list_for_each_entry(pw, &works, pw_list) - queue_work(system_unbound_wq, &pw->pw_work); + if (job->numa_aware) { + int old_node = atomic_read(&last_used_nid); + + do { + nid = next_node_in(old_node, node_states[N_CPU]); + } while (!atomic_try_cmpxchg(&last_used_nid, &old_node, nid)); + queue_work_node(nid, system_unbound_wq, &pw->pw_work); + } else { + queue_work(system_unbound_wq, &pw->pw_work); + } /* Use the current thread, which saves starting a workqueue worker. */ padata_work_init(&my_work, padata_mt_helper, &ps, PADATA_WORK_ONSTACK); @@ -593,7 +630,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps) padata_init_reorder_list(pd); padata_init_squeues(pd); pd->seq_nr = -1; - atomic_set(&pd->refcnt, 1); + refcount_set(&pd->refcnt, 1); spin_lock_init(&pd->lock); pd->cpu = cpumask_first(pd->cpumask.pcpu); INIT_WORK(&pd->reorder_work, invoke_padata_reorder); @@ -667,8 +704,7 @@ static int padata_replace(struct padata_instance *pinst) synchronize_rcu(); list_for_each_entry_continue_reverse(ps, &pinst->pslist, list) - if (atomic_dec_and_test(&ps->opd->refcnt)) - padata_free_pd(ps->opd); + padata_put_pd(ps->opd); pinst->flags &= ~PADATA_RESET; @@ -733,7 +769,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, struct cpumask *serial_mask, *parallel_mask; int err = -EINVAL; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&pinst->lock); switch (cpumask_type) { @@ -753,7 +789,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, out: mutex_unlock(&pinst->lock); - put_online_cpus(); + cpus_read_unlock(); return err; } @@ -956,7 +992,7 @@ static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr, pinst = kobj2pinst(kobj); pentry = attr2pentry(attr); - if (pentry->show) + if (pentry->store) ret = pentry->store(pinst, attr, buf, count); return ret; @@ -967,7 +1003,7 @@ static const struct sysfs_ops padata_sysfs_ops = { .store = padata_sysfs_store, }; -static struct kobj_type padata_attr_type = { +static const struct kobj_type padata_attr_type = { .sysfs_ops = &padata_sysfs_ops, .default_groups = padata_default_groups, .release = padata_sysfs_release, @@ -992,7 +1028,7 @@ struct padata_instance *padata_alloc(const char *name) if (!pinst->parallel_wq) goto err_free_inst; - get_online_cpus(); + cpus_read_lock(); pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1, name); @@ -1026,7 +1062,7 @@ struct padata_instance *padata_alloc(const char *name) &pinst->cpu_dead_node); #endif - put_online_cpus(); + cpus_read_unlock(); return pinst; @@ -1036,7 +1072,7 @@ err_free_masks: err_free_serial_wq: destroy_workqueue(pinst->serial_wq); err_put_cpus: - put_online_cpus(); + cpus_read_unlock(); destroy_workqueue(pinst->parallel_wq); err_free_inst: kfree(pinst); @@ -1074,9 +1110,9 @@ struct padata_shell *padata_alloc_shell(struct padata_instance *pinst) ps->pinst = pinst; - get_online_cpus(); + cpus_read_lock(); pd = padata_alloc_pd(ps); - put_online_cpus(); + cpus_read_unlock(); if (!pd) goto out_free_ps; @@ -1102,12 +1138,21 @@ EXPORT_SYMBOL(padata_alloc_shell); */ void padata_free_shell(struct padata_shell *ps) { + struct parallel_data *pd; + if (!ps) return; + /* + * Wait for all _do_serial calls to finish to avoid touching + * freed pd's and ps's. + */ + synchronize_rcu(); + mutex_lock(&ps->pinst->lock); list_del(&ps->list); - padata_free_pd(rcu_dereference_protected(ps->pd, 1)); + pd = rcu_dereference_protected(ps->pd, 1); + padata_put_pd(pd); mutex_unlock(&ps->pinst->lock); kfree(ps); |