summaryrefslogtreecommitdiff
path: root/include/linux/workqueue.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/workqueue.h')
-rw-r--r--include/linux/workqueue.h661
1 files changed, 445 insertions, 216 deletions
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a0ed78ab54d7..dabc351cc127 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* workqueue.h --- work queue handling for Linux.
*/
@@ -5,19 +6,16 @@
#ifndef _LINUX_WORKQUEUE_H
#define _LINUX_WORKQUEUE_H
+#include <linux/alloc_tag.h>
#include <linux/timer.h>
#include <linux/linkage.h>
#include <linux/bitops.h>
#include <linux/lockdep.h>
#include <linux/threads.h>
#include <linux/atomic.h>
-#include <linux/cpumask.h>
-
-struct workqueue_struct;
-
-struct work_struct;
-typedef void (*work_func_t)(struct work_struct *work);
-void delayed_work_timer_fn(unsigned long __data);
+#include <linux/cpumask_types.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue_types.h>
/*
* The first word is the work queue pointer and the flags rolled into
@@ -25,90 +23,93 @@ void delayed_work_timer_fn(unsigned long __data);
*/
#define work_data_bits(work) ((unsigned long *)(&(work)->data))
-enum {
+enum work_bits {
WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
- WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */
- WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */
- WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */
+ WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */
+ WORK_STRUCT_PWQ_BIT, /* data points to pwq */
+ WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
- WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */
- WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */
-#else
- WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */
+ WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */
#endif
+ WORK_STRUCT_FLAG_BITS,
+ /* color for workqueue flushing */
+ WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS,
WORK_STRUCT_COLOR_BITS = 4,
- WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
- WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT,
- WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
- WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT,
-#ifdef CONFIG_DEBUG_OBJECTS_WORK
- WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT,
-#else
- WORK_STRUCT_STATIC = 0,
-#endif
-
/*
- * The last color is no color used for works which don't
- * participate in workqueue flushing.
+ * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/
+ * debugobjects turned off. This makes pwqs aligned to 256 bytes (512
+ * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors.
+ *
+ * MSB
+ * [ pwq pointer ] [ flush color ] [ STRUCT flags ]
+ * 4 bits 4 or 5 bits
*/
- WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1,
- WORK_NO_COLOR = WORK_NR_COLORS,
-
- /* special cpu IDs */
- WORK_CPU_UNBOUND = NR_CPUS,
- WORK_CPU_END = NR_CPUS + 1,
+ WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS,
/*
- * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
- * This makes pwqs aligned to 256 bytes and allows 15 workqueue
- * flush colors.
+ * data contains off-queue information when !WORK_STRUCT_PWQ.
+ *
+ * MSB
+ * [ pool ID ] [ disable depth ] [ OFFQ flags ] [ STRUCT flags ]
+ * 16 bits 1 bit 4 or 5 bits
*/
- WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
- WORK_STRUCT_COLOR_BITS,
+ WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS,
+ WORK_OFFQ_BH_BIT = WORK_OFFQ_FLAG_SHIFT,
+ WORK_OFFQ_FLAG_END,
+ WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT,
- /* data contains off-queue information when !WORK_STRUCT_PWQ */
- WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT,
-
- WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE),
+ WORK_OFFQ_DISABLE_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS,
+ WORK_OFFQ_DISABLE_BITS = 16,
/*
- * When a work item is off queue, its high bits point to the last
- * pool it was on. Cap at 31 bits and use the highest number to
- * indicate that no pool is associated.
+ * When a work item is off queue, the high bits encode off-queue flags
+ * and the last pool it was on. Cap pool ID to 31 bits and use the
+ * highest number to indicate that no pool is associated.
*/
- WORK_OFFQ_FLAG_BITS = 1,
- WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+ WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_DISABLE_SHIFT + WORK_OFFQ_DISABLE_BITS,
WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
- WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1,
+};
+
+enum work_flags {
+ WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
+ WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT,
+ WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
+ WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT,
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+ WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT,
+#else
+ WORK_STRUCT_STATIC = 0,
+#endif
+};
- /* convenience constants */
- WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1,
- WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
- WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
+enum wq_misc_consts {
+ WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS),
+
+ /* not bound to any CPU, prefer the local CPU */
+ WORK_CPU_UNBOUND = NR_CPUS,
/* bit mask for work_busy() return values */
WORK_BUSY_PENDING = 1 << 0,
WORK_BUSY_RUNNING = 1 << 1,
/* maximum string length for set_worker_desc() */
- WORKER_DESC_LEN = 24,
+ WORKER_DESC_LEN = 32,
};
-struct work_struct {
- atomic_long_t data;
- struct list_head entry;
- work_func_t func;
-#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
-#endif
-};
+/* Convenience constants - of type 'unsigned long', not 'enum'! */
+#define WORK_OFFQ_BH (1ul << WORK_OFFQ_BH_BIT)
+#define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT)
+#define WORK_OFFQ_DISABLE_MASK (((1ul << WORK_OFFQ_DISABLE_BITS) - 1) << WORK_OFFQ_DISABLE_SHIFT)
+#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1)
+#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT)
+#define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1))
-#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
+#define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL)
#define WORK_DATA_STATIC_INIT() \
- ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
+ ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC))
struct delayed_work {
struct work_struct work;
@@ -119,18 +120,93 @@ struct delayed_work {
int cpu;
};
-/*
- * A struct for workqueue attributes. This can be used to change
- * attributes of an unbound workqueue.
+struct rcu_work {
+ struct work_struct work;
+ struct rcu_head rcu;
+
+ /* target workqueue ->rcu uses to queue ->work */
+ struct workqueue_struct *wq;
+};
+
+enum wq_affn_scope {
+ WQ_AFFN_DFL, /* use system default */
+ WQ_AFFN_CPU, /* one pod per CPU */
+ WQ_AFFN_SMT, /* one pod poer SMT */
+ WQ_AFFN_CACHE, /* one pod per LLC */
+ WQ_AFFN_NUMA, /* one pod per NUMA node */
+ WQ_AFFN_SYSTEM, /* one pod across the whole system */
+
+ WQ_AFFN_NR_TYPES,
+};
+
+/**
+ * struct workqueue_attrs - A struct for workqueue attributes.
*
- * Unlike other fields, ->no_numa isn't a property of a worker_pool. It
- * only modifies how apply_workqueue_attrs() select pools and thus doesn't
- * participate in pool hash calculations or equality comparisons.
+ * This can be used to change attributes of an unbound workqueue.
*/
struct workqueue_attrs {
- int nice; /* nice level */
- cpumask_var_t cpumask; /* allowed CPUs */
- bool no_numa; /* disable NUMA affinity */
+ /**
+ * @nice: nice level
+ */
+ int nice;
+
+ /**
+ * @cpumask: allowed CPUs
+ *
+ * Work items in this workqueue are affine to these CPUs and not allowed
+ * to execute on other CPUs. A pool serving a workqueue must have the
+ * same @cpumask.
+ */
+ cpumask_var_t cpumask;
+
+ /**
+ * @__pod_cpumask: internal attribute used to create per-pod pools
+ *
+ * Internal use only.
+ *
+ * Per-pod unbound worker pools are used to improve locality. Always a
+ * subset of ->cpumask. A workqueue can be associated with multiple
+ * worker pools with disjoint @__pod_cpumask's. Whether the enforcement
+ * of a pool's @__pod_cpumask is strict depends on @affn_strict.
+ */
+ cpumask_var_t __pod_cpumask;
+
+ /**
+ * @affn_strict: affinity scope is strict
+ *
+ * If clear, workqueue will make a best-effort attempt at starting the
+ * worker inside @__pod_cpumask but the scheduler is free to migrate it
+ * outside.
+ *
+ * If set, workers are only allowed to run inside @__pod_cpumask.
+ */
+ bool affn_strict;
+
+ /*
+ * Below fields aren't properties of a worker_pool. They only modify how
+ * :c:func:`apply_workqueue_attrs` select pools and thus don't
+ * participate in pool hash calculations or equality comparisons.
+ *
+ * If @affn_strict is set, @cpumask isn't a property of a worker_pool
+ * either.
+ */
+
+ /**
+ * @affn_scope: unbound CPU affinity scope
+ *
+ * CPU pods are used to improve execution locality of unbound work
+ * items. There are multiple pod types, one for each wq_affn_scope, and
+ * every CPU in the system belongs to one pod in every pod type. CPUs
+ * that belong to the same pod share the worker pool. For example,
+ * selecting %WQ_AFFN_NUMA makes the workqueue use a separate worker
+ * pool for each NUMA node.
+ */
+ enum wq_affn_scope affn_scope;
+
+ /**
+ * @ordered: work items must be executed one by one in queueing order
+ */
+ bool ordered;
};
static inline struct delayed_work *to_delayed_work(struct work_struct *work)
@@ -138,6 +214,11 @@ static inline struct delayed_work *to_delayed_work(struct work_struct *work)
return container_of(work, struct delayed_work, work);
}
+static inline struct rcu_work *to_rcu_work(struct work_struct *work)
+{
+ return container_of(work, struct rcu_work, work);
+}
+
struct execute_work {
struct work_struct work;
};
@@ -163,8 +244,7 @@ struct execute_work {
#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
.work = __WORK_INITIALIZER((n).work, (f)), \
- .timer = __TIMER_INITIALIZER(delayed_work_timer_fn, \
- 0, (unsigned long)&(n), \
+ .timer = __TIMER_INITIALIZER(delayed_work_timer_fn,\
(tflags) | TIMER_IRQSAFE), \
}
@@ -177,20 +257,10 @@ struct execute_work {
#define DECLARE_DEFERRABLE_WORK(n, f) \
struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE)
-/*
- * initialize a work item's function pointer
- */
-#define PREPARE_WORK(_work, _func) \
- do { \
- (_work)->func = (_func); \
- } while (0)
-
-#define PREPARE_DELAYED_WORK(_work, _func) \
- PREPARE_WORK(&(_work)->work, (_func))
-
#ifdef CONFIG_DEBUG_OBJECTS_WORK
extern void __init_work(struct work_struct *work, int onstack);
extern void destroy_work_on_stack(struct work_struct *work);
+extern void destroy_delayed_work_on_stack(struct delayed_work *work);
static inline unsigned int work_static(struct work_struct *work)
{
return *work_data_bits(work) & WORK_STRUCT_STATIC;
@@ -198,6 +268,7 @@ static inline unsigned int work_static(struct work_struct *work)
#else
static inline void __init_work(struct work_struct *work, int onstack) { }
static inline void destroy_work_on_stack(struct work_struct *work) { }
+static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { }
static inline unsigned int work_static(struct work_struct *work) { return 0; }
#endif
@@ -209,51 +280,54 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
* to generate better code.
*/
#ifdef CONFIG_LOCKDEP
-#define __INIT_WORK(_work, _func, _onstack) \
+#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \
do { \
- static struct lock_class_key __key; \
- \
__init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
- lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \
+ lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \
INIT_LIST_HEAD(&(_work)->entry); \
- PREPARE_WORK((_work), (_func)); \
+ (_work)->func = (_func); \
} while (0)
#else
-#define __INIT_WORK(_work, _func, _onstack) \
+#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \
do { \
__init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
INIT_LIST_HEAD(&(_work)->entry); \
- PREPARE_WORK((_work), (_func)); \
+ (_work)->func = (_func); \
} while (0)
#endif
-#define INIT_WORK(_work, _func) \
+#define __INIT_WORK(_work, _func, _onstack) \
do { \
- __INIT_WORK((_work), (_func), 0); \
+ static __maybe_unused struct lock_class_key __key; \
+ \
+ __INIT_WORK_KEY(_work, _func, _onstack, &__key); \
} while (0)
+#define INIT_WORK(_work, _func) \
+ __INIT_WORK((_work), (_func), 0)
+
#define INIT_WORK_ONSTACK(_work, _func) \
- do { \
- __INIT_WORK((_work), (_func), 1); \
- } while (0)
+ __INIT_WORK((_work), (_func), 1)
+
+#define INIT_WORK_ONSTACK_KEY(_work, _func, _key) \
+ __INIT_WORK_KEY((_work), (_func), 1, _key)
#define __INIT_DELAYED_WORK(_work, _func, _tflags) \
do { \
INIT_WORK(&(_work)->work, (_func)); \
- __setup_timer(&(_work)->timer, delayed_work_timer_fn, \
- (unsigned long)(_work), \
- (_tflags) | TIMER_IRQSAFE); \
+ __timer_init(&(_work)->timer, \
+ delayed_work_timer_fn, \
+ (_tflags) | TIMER_IRQSAFE); \
} while (0)
#define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \
do { \
INIT_WORK_ONSTACK(&(_work)->work, (_func)); \
- __setup_timer_on_stack(&(_work)->timer, \
- delayed_work_timer_fn, \
- (unsigned long)(_work), \
- (_tflags) | TIMER_IRQSAFE); \
+ __timer_init_on_stack(&(_work)->timer, \
+ delayed_work_timer_fn, \
+ (_tflags) | TIMER_IRQSAFE); \
} while (0)
#define INIT_DELAYED_WORK(_work, _func) \
@@ -268,6 +342,12 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
#define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func) \
__INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE)
+#define INIT_RCU_WORK(_work, _func) \
+ INIT_WORK(&(_work)->work, (_func))
+
+#define INIT_RCU_WORK_ONSTACK(_work, _func) \
+ INIT_WORK_ONSTACK(&(_work)->work, (_func))
+
/**
* work_pending - Find out whether a work item is currently pending
* @work: The work item in question
@@ -278,30 +358,23 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
/**
* delayed_work_pending - Find out whether a delayable work item is currently
* pending
- * @work: The work item in question
+ * @w: The work item in question
*/
#define delayed_work_pending(w) \
work_pending(&(w)->work)
-/**
- * work_clear_pending - for internal use only, mark a work item as not pending
- * @work: The work item in question
- */
-#define work_clear_pending(work) \
- clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
-
/*
* Workqueue flags and constants. For details, please refer to
- * Documentation/workqueue.txt.
+ * Documentation/core-api/workqueue.rst.
*/
-enum {
- WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */
+enum wq_flags {
+ WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */
WQ_UNBOUND = 1 << 1, /* not bound to any cpu */
WQ_FREEZABLE = 1 << 2, /* freeze during suspend */
WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */
WQ_HIGHPRI = 1 << 4, /* high priority */
- WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */
- WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
+ WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */
+ WQ_SYSFS = 1 << 6, /* visible in sysfs, see workqueue_sysfs_register() */
/*
* Per-cpu workqueues are generally preferred because they tend to
@@ -314,7 +387,7 @@ enum {
* to execute and tries to keep idle cores idle to conserve power;
* however, for example, a per-cpu work item scheduled from an
* interrupt handler on an idle CPU will force the scheduler to
- * excute the work item on that CPU breaking the idleness, which in
+ * execute the work item on that CPU breaking the idleness, which in
* turn may lead to more scheduling choices which are sub-optimal
* in terms of power consumption.
*
@@ -329,103 +402,155 @@ enum {
* http://thread.gmane.org/gmane.linux.kernel/1480396
*/
WQ_POWER_EFFICIENT = 1 << 7,
+ WQ_PERCPU = 1 << 8, /* bound to a specific cpu */
+ __WQ_DESTROYING = 1 << 15, /* internal: workqueue is destroying */
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
+ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
- WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
- WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */
- WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2,
+ /* BH wq only allows the following flags */
+ __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI | WQ_PERCPU,
};
-/* unbound wq's aren't per-cpu, scale max_active according to #cpus */
-#define WQ_UNBOUND_MAX_ACTIVE \
- max_t(int, WQ_MAX_ACTIVE, num_possible_cpus() * WQ_MAX_UNBOUND_PER_CPU)
+enum wq_consts {
+ WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */
+ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE,
+ WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2,
+
+ /*
+ * Per-node default cap on min_active. Unless explicitly set, min_active
+ * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see
+ * workqueue_struct->min_active definition.
+ */
+ WQ_DFL_MIN_ACTIVE = 8,
+};
/*
* System-wide workqueues which are always present.
*
- * system_wq is the one used by schedule[_delayed]_work[_on]().
+ * system_percpu_wq is the one used by schedule[_delayed]_work[_on]().
* Multi-CPU multi-threaded. There are users which expect relatively
* short queue flush time. Don't queue works which can run for too
* long.
*
- * system_long_wq is similar to system_wq but may host long running
+ * system_highpri_wq is similar to system_percpu_wq but for work items which
+ * require WQ_HIGHPRI.
+ *
+ * system_long_wq is similar to system_percpu_wq but may host long running
* works. Queue flushing might take relatively long.
*
- * system_unbound_wq is unbound workqueue. Workers are not bound to
+ * system_dfl_wq is unbound workqueue. Workers are not bound to
* any specific CPU, not concurrency managed, and all queued works are
* executed immediately as long as max_active limit is not reached and
* resources are available.
*
- * system_freezable_wq is equivalent to system_wq except that it's
+ * system_freezable_wq is equivalent to system_percpu_wq except that it's
* freezable.
*
* *_power_efficient_wq are inclined towards saving power and converted
* into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
* they are same as their non-power-efficient counterparts - e.g.
- * system_power_efficient_wq is identical to system_wq if
+ * system_power_efficient_wq is identical to system_percpu_wq if
* 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info.
+ *
+ * system_bh[_highpri]_wq are convenience interface to softirq. BH work items
+ * are executed in the queueing CPU's BH context in the queueing order.
*/
-extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_wq; /* use system_percpu_wq, this will be removed */
+extern struct workqueue_struct *system_percpu_wq;
+extern struct workqueue_struct *system_highpri_wq;
extern struct workqueue_struct *system_long_wq;
extern struct workqueue_struct *system_unbound_wq;
+extern struct workqueue_struct *system_dfl_wq;
extern struct workqueue_struct *system_freezable_wq;
extern struct workqueue_struct *system_power_efficient_wq;
extern struct workqueue_struct *system_freezable_power_efficient_wq;
+extern struct workqueue_struct *system_bh_wq;
+extern struct workqueue_struct *system_bh_highpri_wq;
-static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
-{
- return system_wq;
-}
-
-static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void)
-{
- return system_freezable_wq;
-}
-
-/* equivlalent to system_wq and system_freezable_wq, deprecated */
-#define system_nrt_wq __system_nrt_wq()
-#define system_nrt_freezable_wq __system_nrt_freezable_wq()
-
-extern struct workqueue_struct *
-__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
- struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
+void workqueue_softirq_action(bool highpri);
+void workqueue_softirq_dead(unsigned int cpu);
/**
* alloc_workqueue - allocate a workqueue
* @fmt: printf format for the name of the workqueue
* @flags: WQ_* flags
* @max_active: max in-flight work items, 0 for default
- * @args: args for @fmt
+ * @...: args for @fmt
+ *
+ * For a per-cpu workqueue, @max_active limits the number of in-flight work
+ * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be
+ * executing at most one work item for the workqueue.
+ *
+ * For unbound workqueues, @max_active limits the number of in-flight work items
+ * for the whole system. e.g. @max_active of 16 indicates that there can be
+ * at most 16 work items executing for the workqueue in the whole system.
*
- * Allocate a workqueue with the specified parameters. For detailed
- * information on WQ_* flags, please refer to Documentation/workqueue.txt.
+ * As sharing the same active counter for an unbound workqueue across multiple
+ * NUMA nodes can be expensive, @max_active is distributed to each NUMA node
+ * according to the proportion of the number of online CPUs and enforced
+ * independently.
*
- * The __lock_name macro dance is to guarantee that single lock_class_key
- * doesn't end up with different namesm, which isn't allowed by lockdep.
+ * Depending on online CPU distribution, a node may end up with per-node
+ * max_active which is significantly lower than @max_active, which can lead to
+ * deadlocks if the per-node concurrency limit is lower than the maximum number
+ * of interdependent work items for the workqueue.
+ *
+ * To guarantee forward progress regardless of online CPU distribution, the
+ * concurrency limit on every node is guaranteed to be equal to or greater than
+ * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means
+ * that the sum of per-node max_active's may be larger than @max_active.
+ *
+ * For detailed information on %WQ_\* flags, please refer to
+ * Documentation/core-api/workqueue.rst.
*
* RETURNS:
* Pointer to the allocated workqueue on success, %NULL on failure.
*/
+__printf(1, 4) struct workqueue_struct *
+alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...);
+#define alloc_workqueue(...) alloc_hooks(alloc_workqueue_noprof(__VA_ARGS__))
+
#ifdef CONFIG_LOCKDEP
-#define alloc_workqueue(fmt, flags, max_active, args...) \
-({ \
- static struct lock_class_key __key; \
- const char *__lock_name; \
- \
- if (__builtin_constant_p(fmt)) \
- __lock_name = (fmt); \
- else \
- __lock_name = #fmt; \
- \
- __alloc_workqueue_key((fmt), (flags), (max_active), \
- &__key, __lock_name, ##args); \
-})
-#else
-#define alloc_workqueue(fmt, flags, max_active, args...) \
- __alloc_workqueue_key((fmt), (flags), (max_active), \
- NULL, NULL, ##args)
+/**
+ * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map
+ * @fmt: printf format for the name of the workqueue
+ * @flags: WQ_* flags
+ * @max_active: max in-flight work items, 0 for default
+ * @lockdep_map: user-defined lockdep_map
+ * @...: args for @fmt
+ *
+ * Same as alloc_workqueue but with the a user-define lockdep_map. Useful for
+ * workqueues created with the same purpose and to avoid leaking a lockdep_map
+ * on each workqueue creation.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
+__printf(1, 5) struct workqueue_struct *
+alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
+ struct lockdep_map *lockdep_map, ...);
+
+/**
+ * alloc_ordered_workqueue_lockdep_map - allocate an ordered workqueue with
+ * user-defined lockdep_map
+ *
+ * @fmt: printf format for the name of the workqueue
+ * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
+ * @lockdep_map: user-defined lockdep_map
+ * @args: args for @fmt
+ *
+ * Same as alloc_ordered_workqueue but with the a user-define lockdep_map.
+ * Useful for workqueues created with the same purpose and to avoid leaking a
+ * lockdep_map on each workqueue creation.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
+#define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \
+ alloc_hooks(alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags),\
+ 1, lockdep_map, ##args))
#endif
/**
@@ -445,49 +570,75 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
#define create_workqueue(name) \
- alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
+ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name))
#define create_freezable_workqueue(name) \
- alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
- 1, (name))
+ alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
+ WQ_MEM_RECLAIM, 1, (name))
#define create_singlethread_workqueue(name) \
- alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, (name))
+ alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
+
+#define from_work(var, callback_work, work_fieldname) \
+ container_of(callback_work, typeof(*var), work_fieldname)
extern void destroy_workqueue(struct workqueue_struct *wq);
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
+struct workqueue_attrs *alloc_workqueue_attrs_noprof(void);
+#define alloc_workqueue_attrs(...) alloc_hooks(alloc_workqueue_attrs_noprof(__VA_ARGS__))
+
void free_workqueue_attrs(struct workqueue_attrs *attrs);
int apply_workqueue_attrs(struct workqueue_struct *wq,
const struct workqueue_attrs *attrs);
+extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask);
extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work);
+extern bool queue_work_node(int node, struct workqueue_struct *wq,
+ struct work_struct *work);
extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *work, unsigned long delay);
extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *dwork, unsigned long delay);
+extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork);
-extern void flush_workqueue(struct workqueue_struct *wq);
+extern void __flush_workqueue(struct workqueue_struct *wq);
extern void drain_workqueue(struct workqueue_struct *wq);
-extern void flush_scheduled_work(void);
extern int schedule_on_each_cpu(work_func_t func);
int execute_in_process_context(work_func_t fn, struct execute_work *);
extern bool flush_work(struct work_struct *work);
+extern bool cancel_work(struct work_struct *work);
extern bool cancel_work_sync(struct work_struct *work);
extern bool flush_delayed_work(struct delayed_work *dwork);
extern bool cancel_delayed_work(struct delayed_work *dwork);
extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
+extern bool disable_work(struct work_struct *work);
+extern bool disable_work_sync(struct work_struct *work);
+extern bool enable_work(struct work_struct *work);
+
+extern bool disable_delayed_work(struct delayed_work *dwork);
+extern bool disable_delayed_work_sync(struct delayed_work *dwork);
+extern bool enable_delayed_work(struct delayed_work *dwork);
+
+extern bool flush_rcu_work(struct rcu_work *rwork);
+
extern void workqueue_set_max_active(struct workqueue_struct *wq,
int max_active);
+extern void workqueue_set_min_active(struct workqueue_struct *wq,
+ int min_active);
+extern struct work_struct *current_work(void);
extern bool current_is_workqueue_rescuer(void);
extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
extern unsigned int work_busy(struct work_struct *work);
extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
extern void print_worker_info(const char *log_lvl, struct task_struct *task);
+extern void show_all_workqueues(void);
+extern void show_freezable_workqueues(void);
+extern void show_one_workqueue(struct workqueue_struct *wq);
+extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);
/**
* queue_work - queue work on a workqueue
@@ -498,6 +649,19 @@ extern void print_worker_info(const char *log_lvl, struct task_struct *task);
*
* We queue the work to the CPU on which it was submitted, but if the CPU dies
* it can be processed by another CPU.
+ *
+ * Memory-ordering properties: If it returns %true, guarantees that all stores
+ * preceding the call to queue_work() in the program order will be visible from
+ * the CPU which will execute @work by the time such work executes, e.g.,
+ *
+ * { x is initially 0 }
+ *
+ * CPU0 CPU1
+ *
+ * WRITE_ONCE(x, 1); [ @work is being executed ]
+ * r0 = queue_work(wq, work); r1 = READ_ONCE(x);
+ *
+ * Forbids: r0 == true && r1 == 0
*/
static inline bool queue_work(struct workqueue_struct *wq,
struct work_struct *work)
@@ -544,7 +708,7 @@ static inline bool mod_delayed_work(struct workqueue_struct *wq,
*/
static inline bool schedule_work_on(int cpu, struct work_struct *work)
{
- return queue_work_on(cpu, system_wq, work);
+ return queue_work_on(cpu, system_percpu_wq, work);
}
/**
@@ -557,13 +721,81 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work)
* This puts a job in the kernel-global workqueue if it was not already
* queued and leaves it in the same position on the kernel-global
* workqueue otherwise.
+ *
+ * Shares the same memory-ordering properties of queue_work(), cf. the
+ * DocBook header of queue_work().
*/
static inline bool schedule_work(struct work_struct *work)
{
- return queue_work(system_wq, work);
+ return queue_work(system_percpu_wq, work);
}
/**
+ * enable_and_queue_work - Enable and queue a work item on a specific workqueue
+ * @wq: The target workqueue
+ * @work: The work item to be enabled and queued
+ *
+ * This function combines the operations of enable_work() and queue_work(),
+ * providing a convenient way to enable and queue a work item in a single call.
+ * It invokes enable_work() on @work and then queues it if the disable depth
+ * reached 0. Returns %true if the disable depth reached 0 and @work is queued,
+ * and %false otherwise.
+ *
+ * Note that @work is always queued when disable depth reaches zero. If the
+ * desired behavior is queueing only if certain events took place while @work is
+ * disabled, the user should implement the necessary state tracking and perform
+ * explicit conditional queueing after enable_work().
+ */
+static inline bool enable_and_queue_work(struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+ if (enable_work(work)) {
+ queue_work(wq, work);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Detect attempt to flush system-wide workqueues at compile time when possible.
+ * Warn attempt to flush system-wide workqueues at runtime.
+ *
+ * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
+ * for reasons and steps for converting system-wide workqueues into local workqueues.
+ */
+extern void __warn_flushing_systemwide_wq(void)
+ __compiletime_warning("Please avoid flushing system-wide workqueues.");
+
+/* Please stop using this function, for this function will be removed in near future. */
+#define flush_scheduled_work() \
+({ \
+ __warn_flushing_systemwide_wq(); \
+ __flush_workqueue(system_percpu_wq); \
+})
+
+#define flush_workqueue(wq) \
+({ \
+ struct workqueue_struct *_wq = (wq); \
+ \
+ if ((__builtin_constant_p(_wq == system_percpu_wq) && \
+ _wq == system_percpu_wq) || \
+ (__builtin_constant_p(_wq == system_highpri_wq) && \
+ _wq == system_highpri_wq) || \
+ (__builtin_constant_p(_wq == system_long_wq) && \
+ _wq == system_long_wq) || \
+ (__builtin_constant_p(_wq == system_dfl_wq) && \
+ _wq == system_dfl_wq) || \
+ (__builtin_constant_p(_wq == system_freezable_wq) && \
+ _wq == system_freezable_wq) || \
+ (__builtin_constant_p(_wq == system_power_efficient_wq) && \
+ _wq == system_power_efficient_wq) || \
+ (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \
+ _wq == system_freezable_power_efficient_wq)) \
+ __warn_flushing_systemwide_wq(); \
+ __flush_workqueue(_wq); \
+})
+
+/**
* schedule_delayed_work_on - queue work in global workqueue on CPU after delay
* @cpu: cpu to use
* @dwork: job to be done
@@ -575,7 +807,7 @@ static inline bool schedule_work(struct work_struct *work)
static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
unsigned long delay)
{
- return queue_delayed_work_on(cpu, system_wq, dwork, delay);
+ return queue_delayed_work_on(cpu, system_percpu_wq, dwork, delay);
}
/**
@@ -589,42 +821,7 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
static inline bool schedule_delayed_work(struct delayed_work *dwork,
unsigned long delay)
{
- return queue_delayed_work(system_wq, dwork, delay);
-}
-
-/**
- * keventd_up - is workqueue initialized yet?
- */
-static inline bool keventd_up(void)
-{
- return system_wq != NULL;
-}
-
-/*
- * Like above, but uses del_timer() instead of del_timer_sync(). This means,
- * if it returns 0 the timer function may be running and the queueing is in
- * progress.
- */
-static inline bool __deprecated __cancel_delayed_work(struct delayed_work *work)
-{
- bool ret;
-
- ret = del_timer(&work->timer);
- if (ret)
- work_clear_pending(&work->work);
- return ret;
-}
-
-/* used to be different but now identical to flush_work(), deprecated */
-static inline bool __deprecated flush_work_sync(struct work_struct *work)
-{
- return flush_work(work);
-}
-
-/* used to be different but now identical to flush_delayed_work(), deprecated */
-static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork)
-{
- return flush_delayed_work(dwork);
+ return queue_delayed_work(system_percpu_wq, dwork, delay);
}
#ifndef CONFIG_SMP
@@ -632,8 +829,24 @@ static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
{
return fn(arg);
}
+static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
+{
+ return fn(arg);
+}
#else
-long work_on_cpu(int cpu, long (*fn)(void *), void *arg);
+long work_on_cpu_key(int cpu, long (*fn)(void *),
+ void *arg, struct lock_class_key *key);
+/*
+ * A new key is defined for each caller to make sure the work
+ * associated with the function doesn't share its locking class.
+ */
+#define work_on_cpu(_cpu, _fn, _arg) \
+({ \
+ static struct lock_class_key __key; \
+ \
+ work_on_cpu_key(_cpu, _fn, _arg, &__key); \
+})
+
#endif /* CONFIG_SMP */
#ifdef CONFIG_FREEZER
@@ -649,4 +862,20 @@ static inline int workqueue_sysfs_register(struct workqueue_struct *wq)
{ return 0; }
#endif /* CONFIG_SYSFS */
+#ifdef CONFIG_WQ_WATCHDOG
+void wq_watchdog_touch(int cpu);
+#else /* CONFIG_WQ_WATCHDOG */
+static inline void wq_watchdog_touch(int cpu) { }
+#endif /* CONFIG_WQ_WATCHDOG */
+
+#ifdef CONFIG_SMP
+int workqueue_prepare_cpu(unsigned int cpu);
+int workqueue_online_cpu(unsigned int cpu);
+int workqueue_offline_cpu(unsigned int cpu);
+#endif
+
+void __init workqueue_init_early(void);
+void __init workqueue_init(void);
+void __init workqueue_init_topology(void);
+
#endif