diff options
Diffstat (limited to 'include/linux/workqueue.h')
| -rw-r--r-- | include/linux/workqueue.h | 661 |
1 files changed, 445 insertions, 216 deletions
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a0ed78ab54d7..dabc351cc127 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * workqueue.h --- work queue handling for Linux. */ @@ -5,19 +6,16 @@ #ifndef _LINUX_WORKQUEUE_H #define _LINUX_WORKQUEUE_H +#include <linux/alloc_tag.h> #include <linux/timer.h> #include <linux/linkage.h> #include <linux/bitops.h> #include <linux/lockdep.h> #include <linux/threads.h> #include <linux/atomic.h> -#include <linux/cpumask.h> - -struct workqueue_struct; - -struct work_struct; -typedef void (*work_func_t)(struct work_struct *work); -void delayed_work_timer_fn(unsigned long __data); +#include <linux/cpumask_types.h> +#include <linux/rcupdate.h> +#include <linux/workqueue_types.h> /* * The first word is the work queue pointer and the flags rolled into @@ -25,90 +23,93 @@ void delayed_work_timer_fn(unsigned long __data); */ #define work_data_bits(work) ((unsigned long *)(&(work)->data)) -enum { +enum work_bits { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ - WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ - WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */ - WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ + WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */ + WORK_STRUCT_PWQ_BIT, /* data points to pwq */ + WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ - WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ -#else - WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ + WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */ #endif + WORK_STRUCT_FLAG_BITS, + /* color for workqueue flushing */ + WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_STRUCT_COLOR_BITS = 4, - WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, - WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, - WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, - WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, -#ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT, -#else - WORK_STRUCT_STATIC = 0, -#endif - /* - * The last color is no color used for works which don't - * participate in workqueue flushing. + * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/ + * debugobjects turned off. This makes pwqs aligned to 256 bytes (512 + * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors. + * + * MSB + * [ pwq pointer ] [ flush color ] [ STRUCT flags ] + * 4 bits 4 or 5 bits */ - WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1, - WORK_NO_COLOR = WORK_NR_COLORS, - - /* special cpu IDs */ - WORK_CPU_UNBOUND = NR_CPUS, - WORK_CPU_END = NR_CPUS + 1, + WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, /* - * Reserve 7 bits off of pwq pointer w/ debugobjects turned off. - * This makes pwqs aligned to 256 bytes and allows 15 workqueue - * flush colors. + * data contains off-queue information when !WORK_STRUCT_PWQ. + * + * MSB + * [ pool ID ] [ disable depth ] [ OFFQ flags ] [ STRUCT flags ] + * 16 bits 1 bit 4 or 5 bits */ - WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + - WORK_STRUCT_COLOR_BITS, + WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, + WORK_OFFQ_BH_BIT = WORK_OFFQ_FLAG_SHIFT, + WORK_OFFQ_FLAG_END, + WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, - /* data contains off-queue information when !WORK_STRUCT_PWQ */ - WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, - - WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), + WORK_OFFQ_DISABLE_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_DISABLE_BITS = 16, /* - * When a work item is off queue, its high bits point to the last - * pool it was on. Cap at 31 bits and use the highest number to - * indicate that no pool is associated. + * When a work item is off queue, the high bits encode off-queue flags + * and the last pool it was on. Cap pool ID to 31 bits and use the + * highest number to indicate that no pool is associated. */ - WORK_OFFQ_FLAG_BITS = 1, - WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_DISABLE_SHIFT + WORK_OFFQ_DISABLE_BITS, WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, - WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1, +}; + +enum work_flags { + WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, + WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT, + WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, + WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, +#ifdef CONFIG_DEBUG_OBJECTS_WORK + WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT, +#else + WORK_STRUCT_STATIC = 0, +#endif +}; - /* convenience constants */ - WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1, - WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK, - WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT, +enum wq_misc_consts { + WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS), + + /* not bound to any CPU, prefer the local CPU */ + WORK_CPU_UNBOUND = NR_CPUS, /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, /* maximum string length for set_worker_desc() */ - WORKER_DESC_LEN = 24, + WORKER_DESC_LEN = 32, }; -struct work_struct { - atomic_long_t data; - struct list_head entry; - work_func_t func; -#ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; -#endif -}; +/* Convenience constants - of type 'unsigned long', not 'enum'! */ +#define WORK_OFFQ_BH (1ul << WORK_OFFQ_BH_BIT) +#define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT) +#define WORK_OFFQ_DISABLE_MASK (((1ul << WORK_OFFQ_DISABLE_BITS) - 1) << WORK_OFFQ_DISABLE_SHIFT) +#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) +#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) +#define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) -#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL) +#define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ - ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC) + ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)) struct delayed_work { struct work_struct work; @@ -119,18 +120,93 @@ struct delayed_work { int cpu; }; -/* - * A struct for workqueue attributes. This can be used to change - * attributes of an unbound workqueue. +struct rcu_work { + struct work_struct work; + struct rcu_head rcu; + + /* target workqueue ->rcu uses to queue ->work */ + struct workqueue_struct *wq; +}; + +enum wq_affn_scope { + WQ_AFFN_DFL, /* use system default */ + WQ_AFFN_CPU, /* one pod per CPU */ + WQ_AFFN_SMT, /* one pod poer SMT */ + WQ_AFFN_CACHE, /* one pod per LLC */ + WQ_AFFN_NUMA, /* one pod per NUMA node */ + WQ_AFFN_SYSTEM, /* one pod across the whole system */ + + WQ_AFFN_NR_TYPES, +}; + +/** + * struct workqueue_attrs - A struct for workqueue attributes. * - * Unlike other fields, ->no_numa isn't a property of a worker_pool. It - * only modifies how apply_workqueue_attrs() select pools and thus doesn't - * participate in pool hash calculations or equality comparisons. + * This can be used to change attributes of an unbound workqueue. */ struct workqueue_attrs { - int nice; /* nice level */ - cpumask_var_t cpumask; /* allowed CPUs */ - bool no_numa; /* disable NUMA affinity */ + /** + * @nice: nice level + */ + int nice; + + /** + * @cpumask: allowed CPUs + * + * Work items in this workqueue are affine to these CPUs and not allowed + * to execute on other CPUs. A pool serving a workqueue must have the + * same @cpumask. + */ + cpumask_var_t cpumask; + + /** + * @__pod_cpumask: internal attribute used to create per-pod pools + * + * Internal use only. + * + * Per-pod unbound worker pools are used to improve locality. Always a + * subset of ->cpumask. A workqueue can be associated with multiple + * worker pools with disjoint @__pod_cpumask's. Whether the enforcement + * of a pool's @__pod_cpumask is strict depends on @affn_strict. + */ + cpumask_var_t __pod_cpumask; + + /** + * @affn_strict: affinity scope is strict + * + * If clear, workqueue will make a best-effort attempt at starting the + * worker inside @__pod_cpumask but the scheduler is free to migrate it + * outside. + * + * If set, workers are only allowed to run inside @__pod_cpumask. + */ + bool affn_strict; + + /* + * Below fields aren't properties of a worker_pool. They only modify how + * :c:func:`apply_workqueue_attrs` select pools and thus don't + * participate in pool hash calculations or equality comparisons. + * + * If @affn_strict is set, @cpumask isn't a property of a worker_pool + * either. + */ + + /** + * @affn_scope: unbound CPU affinity scope + * + * CPU pods are used to improve execution locality of unbound work + * items. There are multiple pod types, one for each wq_affn_scope, and + * every CPU in the system belongs to one pod in every pod type. CPUs + * that belong to the same pod share the worker pool. For example, + * selecting %WQ_AFFN_NUMA makes the workqueue use a separate worker + * pool for each NUMA node. + */ + enum wq_affn_scope affn_scope; + + /** + * @ordered: work items must be executed one by one in queueing order + */ + bool ordered; }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) @@ -138,6 +214,11 @@ static inline struct delayed_work *to_delayed_work(struct work_struct *work) return container_of(work, struct delayed_work, work); } +static inline struct rcu_work *to_rcu_work(struct work_struct *work) +{ + return container_of(work, struct rcu_work, work); +} + struct execute_work { struct work_struct work; }; @@ -163,8 +244,7 @@ struct execute_work { #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ - .timer = __TIMER_INITIALIZER(delayed_work_timer_fn, \ - 0, (unsigned long)&(n), \ + .timer = __TIMER_INITIALIZER(delayed_work_timer_fn,\ (tflags) | TIMER_IRQSAFE), \ } @@ -177,20 +257,10 @@ struct execute_work { #define DECLARE_DEFERRABLE_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE) -/* - * initialize a work item's function pointer - */ -#define PREPARE_WORK(_work, _func) \ - do { \ - (_work)->func = (_func); \ - } while (0) - -#define PREPARE_DELAYED_WORK(_work, _func) \ - PREPARE_WORK(&(_work)->work, (_func)) - #ifdef CONFIG_DEBUG_OBJECTS_WORK extern void __init_work(struct work_struct *work, int onstack); extern void destroy_work_on_stack(struct work_struct *work); +extern void destroy_delayed_work_on_stack(struct delayed_work *work); static inline unsigned int work_static(struct work_struct *work) { return *work_data_bits(work) & WORK_STRUCT_STATIC; @@ -198,6 +268,7 @@ static inline unsigned int work_static(struct work_struct *work) #else static inline void __init_work(struct work_struct *work, int onstack) { } static inline void destroy_work_on_stack(struct work_struct *work) { } +static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { } static inline unsigned int work_static(struct work_struct *work) { return 0; } #endif @@ -209,51 +280,54 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * to generate better code. */ #ifdef CONFIG_LOCKDEP -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ - static struct lock_class_key __key; \ - \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ - lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \ + lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \ INIT_LIST_HEAD(&(_work)->entry); \ - PREPARE_WORK((_work), (_func)); \ + (_work)->func = (_func); \ } while (0) #else -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ - PREPARE_WORK((_work), (_func)); \ + (_work)->func = (_func); \ } while (0) #endif -#define INIT_WORK(_work, _func) \ +#define __INIT_WORK(_work, _func, _onstack) \ do { \ - __INIT_WORK((_work), (_func), 0); \ + static __maybe_unused struct lock_class_key __key; \ + \ + __INIT_WORK_KEY(_work, _func, _onstack, &__key); \ } while (0) +#define INIT_WORK(_work, _func) \ + __INIT_WORK((_work), (_func), 0) + #define INIT_WORK_ONSTACK(_work, _func) \ - do { \ - __INIT_WORK((_work), (_func), 1); \ - } while (0) + __INIT_WORK((_work), (_func), 1) + +#define INIT_WORK_ONSTACK_KEY(_work, _func, _key) \ + __INIT_WORK_KEY((_work), (_func), 1, _key) #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ - __setup_timer(&(_work)->timer, delayed_work_timer_fn, \ - (unsigned long)(_work), \ - (_tflags) | TIMER_IRQSAFE); \ + __timer_init(&(_work)->timer, \ + delayed_work_timer_fn, \ + (_tflags) | TIMER_IRQSAFE); \ } while (0) #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \ do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ - __setup_timer_on_stack(&(_work)->timer, \ - delayed_work_timer_fn, \ - (unsigned long)(_work), \ - (_tflags) | TIMER_IRQSAFE); \ + __timer_init_on_stack(&(_work)->timer, \ + delayed_work_timer_fn, \ + (_tflags) | TIMER_IRQSAFE); \ } while (0) #define INIT_DELAYED_WORK(_work, _func) \ @@ -268,6 +342,12 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func) \ __INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE) +#define INIT_RCU_WORK(_work, _func) \ + INIT_WORK(&(_work)->work, (_func)) + +#define INIT_RCU_WORK_ONSTACK(_work, _func) \ + INIT_WORK_ONSTACK(&(_work)->work, (_func)) + /** * work_pending - Find out whether a work item is currently pending * @work: The work item in question @@ -278,30 +358,23 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } /** * delayed_work_pending - Find out whether a delayable work item is currently * pending - * @work: The work item in question + * @w: The work item in question */ #define delayed_work_pending(w) \ work_pending(&(w)->work) -/** - * work_clear_pending - for internal use only, mark a work item as not pending - * @work: The work item in question - */ -#define work_clear_pending(work) \ - clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) - /* * Workqueue flags and constants. For details, please refer to - * Documentation/workqueue.txt. + * Documentation/core-api/workqueue.rst. */ -enum { - WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ +enum wq_flags { + WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ - WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ - WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ + WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */ + WQ_SYSFS = 1 << 6, /* visible in sysfs, see workqueue_sysfs_register() */ /* * Per-cpu workqueues are generally preferred because they tend to @@ -314,7 +387,7 @@ enum { * to execute and tries to keep idle cores idle to conserve power; * however, for example, a per-cpu work item scheduled from an * interrupt handler on an idle CPU will force the scheduler to - * excute the work item on that CPU breaking the idleness, which in + * execute the work item on that CPU breaking the idleness, which in * turn may lead to more scheduling choices which are sub-optimal * in terms of power consumption. * @@ -329,103 +402,155 @@ enum { * http://thread.gmane.org/gmane.linux.kernel/1480396 */ WQ_POWER_EFFICIENT = 1 << 7, + WQ_PERCPU = 1 << 8, /* bound to a specific cpu */ + __WQ_DESTROYING = 1 << 15, /* internal: workqueue is destroying */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ + __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ - WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ - WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ - WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, + /* BH wq only allows the following flags */ + __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI | WQ_PERCPU, }; -/* unbound wq's aren't per-cpu, scale max_active according to #cpus */ -#define WQ_UNBOUND_MAX_ACTIVE \ - max_t(int, WQ_MAX_ACTIVE, num_possible_cpus() * WQ_MAX_UNBOUND_PER_CPU) +enum wq_consts { + WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */ + WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, + WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, + + /* + * Per-node default cap on min_active. Unless explicitly set, min_active + * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see + * workqueue_struct->min_active definition. + */ + WQ_DFL_MIN_ACTIVE = 8, +}; /* * System-wide workqueues which are always present. * - * system_wq is the one used by schedule[_delayed]_work[_on](). + * system_percpu_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. * - * system_long_wq is similar to system_wq but may host long running + * system_highpri_wq is similar to system_percpu_wq but for work items which + * require WQ_HIGHPRI. + * + * system_long_wq is similar to system_percpu_wq but may host long running * works. Queue flushing might take relatively long. * - * system_unbound_wq is unbound workqueue. Workers are not bound to + * system_dfl_wq is unbound workqueue. Workers are not bound to * any specific CPU, not concurrency managed, and all queued works are * executed immediately as long as max_active limit is not reached and * resources are available. * - * system_freezable_wq is equivalent to system_wq except that it's + * system_freezable_wq is equivalent to system_percpu_wq except that it's * freezable. * * *_power_efficient_wq are inclined towards saving power and converted * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise, * they are same as their non-power-efficient counterparts - e.g. - * system_power_efficient_wq is identical to system_wq if + * system_power_efficient_wq is identical to system_percpu_wq if * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. + * + * system_bh[_highpri]_wq are convenience interface to softirq. BH work items + * are executed in the queueing CPU's BH context in the queueing order. */ -extern struct workqueue_struct *system_wq; +extern struct workqueue_struct *system_wq; /* use system_percpu_wq, this will be removed */ +extern struct workqueue_struct *system_percpu_wq; +extern struct workqueue_struct *system_highpri_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; +extern struct workqueue_struct *system_dfl_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; +extern struct workqueue_struct *system_bh_wq; +extern struct workqueue_struct *system_bh_highpri_wq; -static inline struct workqueue_struct * __deprecated __system_nrt_wq(void) -{ - return system_wq; -} - -static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void) -{ - return system_freezable_wq; -} - -/* equivlalent to system_wq and system_freezable_wq, deprecated */ -#define system_nrt_wq __system_nrt_wq() -#define system_nrt_freezable_wq __system_nrt_freezable_wq() - -extern struct workqueue_struct * -__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, - struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6); +void workqueue_softirq_action(bool highpri); +void workqueue_softirq_dead(unsigned int cpu); /** * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags * @max_active: max in-flight work items, 0 for default - * @args: args for @fmt + * @...: args for @fmt + * + * For a per-cpu workqueue, @max_active limits the number of in-flight work + * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be + * executing at most one work item for the workqueue. + * + * For unbound workqueues, @max_active limits the number of in-flight work items + * for the whole system. e.g. @max_active of 16 indicates that there can be + * at most 16 work items executing for the workqueue in the whole system. * - * Allocate a workqueue with the specified parameters. For detailed - * information on WQ_* flags, please refer to Documentation/workqueue.txt. + * As sharing the same active counter for an unbound workqueue across multiple + * NUMA nodes can be expensive, @max_active is distributed to each NUMA node + * according to the proportion of the number of online CPUs and enforced + * independently. * - * The __lock_name macro dance is to guarantee that single lock_class_key - * doesn't end up with different namesm, which isn't allowed by lockdep. + * Depending on online CPU distribution, a node may end up with per-node + * max_active which is significantly lower than @max_active, which can lead to + * deadlocks if the per-node concurrency limit is lower than the maximum number + * of interdependent work items for the workqueue. + * + * To guarantee forward progress regardless of online CPU distribution, the + * concurrency limit on every node is guaranteed to be equal to or greater than + * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means + * that the sum of per-node max_active's may be larger than @max_active. + * + * For detailed information on %WQ_\* flags, please refer to + * Documentation/core-api/workqueue.rst. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ +__printf(1, 4) struct workqueue_struct * +alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...); +#define alloc_workqueue(...) alloc_hooks(alloc_workqueue_noprof(__VA_ARGS__)) + #ifdef CONFIG_LOCKDEP -#define alloc_workqueue(fmt, flags, max_active, args...) \ -({ \ - static struct lock_class_key __key; \ - const char *__lock_name; \ - \ - if (__builtin_constant_p(fmt)) \ - __lock_name = (fmt); \ - else \ - __lock_name = #fmt; \ - \ - __alloc_workqueue_key((fmt), (flags), (max_active), \ - &__key, __lock_name, ##args); \ -}) -#else -#define alloc_workqueue(fmt, flags, max_active, args...) \ - __alloc_workqueue_key((fmt), (flags), (max_active), \ - NULL, NULL, ##args) +/** + * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags + * @max_active: max in-flight work items, 0 for default + * @lockdep_map: user-defined lockdep_map + * @...: args for @fmt + * + * Same as alloc_workqueue but with the a user-define lockdep_map. Useful for + * workqueues created with the same purpose and to avoid leaking a lockdep_map + * on each workqueue creation. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +__printf(1, 5) struct workqueue_struct * +alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, + struct lockdep_map *lockdep_map, ...); + +/** + * alloc_ordered_workqueue_lockdep_map - allocate an ordered workqueue with + * user-defined lockdep_map + * + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) + * @lockdep_map: user-defined lockdep_map + * @args: args for @fmt + * + * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. + * Useful for workqueues created with the same purpose and to avoid leaking a + * lockdep_map on each workqueue creation. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +#define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ + alloc_hooks(alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags),\ + 1, lockdep_map, ##args)) #endif /** @@ -445,49 +570,75 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ - alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name)) + alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name)) #define create_freezable_workqueue(name) \ - alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \ - 1, (name)) + alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ + WQ_MEM_RECLAIM, 1, (name)) #define create_singlethread_workqueue(name) \ - alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, (name)) + alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) + +#define from_work(var, callback_work, work_fieldname) \ + container_of(callback_work, typeof(*var), work_fieldname) extern void destroy_workqueue(struct workqueue_struct *wq); -struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask); +struct workqueue_attrs *alloc_workqueue_attrs_noprof(void); +#define alloc_workqueue_attrs(...) alloc_hooks(alloc_workqueue_attrs_noprof(__VA_ARGS__)) + void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); +extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); +extern bool queue_work_node(int node, struct workqueue_struct *wq, + struct work_struct *work); extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay); +extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork); -extern void flush_workqueue(struct workqueue_struct *wq); +extern void __flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); -extern void flush_scheduled_work(void); extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); +extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); +extern bool disable_work(struct work_struct *work); +extern bool disable_work_sync(struct work_struct *work); +extern bool enable_work(struct work_struct *work); + +extern bool disable_delayed_work(struct delayed_work *dwork); +extern bool disable_delayed_work_sync(struct delayed_work *dwork); +extern bool enable_delayed_work(struct delayed_work *dwork); + +extern bool flush_rcu_work(struct rcu_work *rwork); + extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); +extern void workqueue_set_min_active(struct workqueue_struct *wq, + int min_active); +extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); extern void print_worker_info(const char *log_lvl, struct task_struct *task); +extern void show_all_workqueues(void); +extern void show_freezable_workqueues(void); +extern void show_one_workqueue(struct workqueue_struct *wq); +extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task); /** * queue_work - queue work on a workqueue @@ -498,6 +649,19 @@ extern void print_worker_info(const char *log_lvl, struct task_struct *task); * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. + * + * Memory-ordering properties: If it returns %true, guarantees that all stores + * preceding the call to queue_work() in the program order will be visible from + * the CPU which will execute @work by the time such work executes, e.g., + * + * { x is initially 0 } + * + * CPU0 CPU1 + * + * WRITE_ONCE(x, 1); [ @work is being executed ] + * r0 = queue_work(wq, work); r1 = READ_ONCE(x); + * + * Forbids: r0 == true && r1 == 0 */ static inline bool queue_work(struct workqueue_struct *wq, struct work_struct *work) @@ -544,7 +708,7 @@ static inline bool mod_delayed_work(struct workqueue_struct *wq, */ static inline bool schedule_work_on(int cpu, struct work_struct *work) { - return queue_work_on(cpu, system_wq, work); + return queue_work_on(cpu, system_percpu_wq, work); } /** @@ -557,13 +721,81 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work) * This puts a job in the kernel-global workqueue if it was not already * queued and leaves it in the same position on the kernel-global * workqueue otherwise. + * + * Shares the same memory-ordering properties of queue_work(), cf. the + * DocBook header of queue_work(). */ static inline bool schedule_work(struct work_struct *work) { - return queue_work(system_wq, work); + return queue_work(system_percpu_wq, work); } /** + * enable_and_queue_work - Enable and queue a work item on a specific workqueue + * @wq: The target workqueue + * @work: The work item to be enabled and queued + * + * This function combines the operations of enable_work() and queue_work(), + * providing a convenient way to enable and queue a work item in a single call. + * It invokes enable_work() on @work and then queues it if the disable depth + * reached 0. Returns %true if the disable depth reached 0 and @work is queued, + * and %false otherwise. + * + * Note that @work is always queued when disable depth reaches zero. If the + * desired behavior is queueing only if certain events took place while @work is + * disabled, the user should implement the necessary state tracking and perform + * explicit conditional queueing after enable_work(). + */ +static inline bool enable_and_queue_work(struct workqueue_struct *wq, + struct work_struct *work) +{ + if (enable_work(work)) { + queue_work(wq, work); + return true; + } + return false; +} + +/* + * Detect attempt to flush system-wide workqueues at compile time when possible. + * Warn attempt to flush system-wide workqueues at runtime. + * + * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp + * for reasons and steps for converting system-wide workqueues into local workqueues. + */ +extern void __warn_flushing_systemwide_wq(void) + __compiletime_warning("Please avoid flushing system-wide workqueues."); + +/* Please stop using this function, for this function will be removed in near future. */ +#define flush_scheduled_work() \ +({ \ + __warn_flushing_systemwide_wq(); \ + __flush_workqueue(system_percpu_wq); \ +}) + +#define flush_workqueue(wq) \ +({ \ + struct workqueue_struct *_wq = (wq); \ + \ + if ((__builtin_constant_p(_wq == system_percpu_wq) && \ + _wq == system_percpu_wq) || \ + (__builtin_constant_p(_wq == system_highpri_wq) && \ + _wq == system_highpri_wq) || \ + (__builtin_constant_p(_wq == system_long_wq) && \ + _wq == system_long_wq) || \ + (__builtin_constant_p(_wq == system_dfl_wq) && \ + _wq == system_dfl_wq) || \ + (__builtin_constant_p(_wq == system_freezable_wq) && \ + _wq == system_freezable_wq) || \ + (__builtin_constant_p(_wq == system_power_efficient_wq) && \ + _wq == system_power_efficient_wq) || \ + (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \ + _wq == system_freezable_power_efficient_wq)) \ + __warn_flushing_systemwide_wq(); \ + __flush_workqueue(_wq); \ +}) + +/** * schedule_delayed_work_on - queue work in global workqueue on CPU after delay * @cpu: cpu to use * @dwork: job to be done @@ -575,7 +807,7 @@ static inline bool schedule_work(struct work_struct *work) static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { - return queue_delayed_work_on(cpu, system_wq, dwork, delay); + return queue_delayed_work_on(cpu, system_percpu_wq, dwork, delay); } /** @@ -589,42 +821,7 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, static inline bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { - return queue_delayed_work(system_wq, dwork, delay); -} - -/** - * keventd_up - is workqueue initialized yet? - */ -static inline bool keventd_up(void) -{ - return system_wq != NULL; -} - -/* - * Like above, but uses del_timer() instead of del_timer_sync(). This means, - * if it returns 0 the timer function may be running and the queueing is in - * progress. - */ -static inline bool __deprecated __cancel_delayed_work(struct delayed_work *work) -{ - bool ret; - - ret = del_timer(&work->timer); - if (ret) - work_clear_pending(&work->work); - return ret; -} - -/* used to be different but now identical to flush_work(), deprecated */ -static inline bool __deprecated flush_work_sync(struct work_struct *work) -{ - return flush_work(work); -} - -/* used to be different but now identical to flush_delayed_work(), deprecated */ -static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork) -{ - return flush_delayed_work(dwork); + return queue_delayed_work(system_percpu_wq, dwork, delay); } #ifndef CONFIG_SMP @@ -632,8 +829,24 @@ static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } +static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) +{ + return fn(arg); +} #else -long work_on_cpu(int cpu, long (*fn)(void *), void *arg); +long work_on_cpu_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key); +/* + * A new key is defined for each caller to make sure the work + * associated with the function doesn't share its locking class. + */ +#define work_on_cpu(_cpu, _fn, _arg) \ +({ \ + static struct lock_class_key __key; \ + \ + work_on_cpu_key(_cpu, _fn, _arg, &__key); \ +}) + #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER @@ -649,4 +862,20 @@ static inline int workqueue_sysfs_register(struct workqueue_struct *wq) { return 0; } #endif /* CONFIG_SYSFS */ +#ifdef CONFIG_WQ_WATCHDOG +void wq_watchdog_touch(int cpu); +#else /* CONFIG_WQ_WATCHDOG */ +static inline void wq_watchdog_touch(int cpu) { } +#endif /* CONFIG_WQ_WATCHDOG */ + +#ifdef CONFIG_SMP +int workqueue_prepare_cpu(unsigned int cpu); +int workqueue_online_cpu(unsigned int cpu); +int workqueue_offline_cpu(unsigned int cpu); +#endif + +void __init workqueue_init_early(void); +void __init workqueue_init(void); +void __init workqueue_init_topology(void); + #endif |
