summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c14
-rw-r--r--kernel/kmod.c9
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/workqueue.c178
-rw-r--r--kernel/workqueue_internal.h66
-rw-r--r--kernel/workqueue_sched.h9
6 files changed, 158 insertions, 120 deletions
diff --git a/kernel/async.c b/kernel/async.c
index 6f34904a0b53..6c68fc3fae7b 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -57,6 +57,8 @@ asynchronous and synchronous parts of the kernel.
#include <linux/slab.h>
#include <linux/workqueue.h>
+#include "workqueue_internal.h"
+
static async_cookie_t next_cookie = 1;
#define MAX_WORK 32768
@@ -353,3 +355,15 @@ void async_synchronize_cookie(async_cookie_t cookie)
async_synchronize_cookie_domain(cookie, &async_running);
}
EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+
+/**
+ * current_is_async - is %current an async worker task?
+ *
+ * Returns %true if %current is an async worker task.
+ */
+bool current_is_async(void)
+{
+ struct worker *worker = current_wq_worker();
+
+ return worker && worker->current_func == async_run_entry_fn;
+}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 0023a87e8de6..56dd34976d7b 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -38,6 +38,7 @@
#include <linux/suspend.h>
#include <linux/rwsem.h>
#include <linux/ptrace.h>
+#include <linux/async.h>
#include <asm/uaccess.h>
#include <trace/events/module.h>
@@ -130,6 +131,14 @@ int __request_module(bool wait, const char *fmt, ...)
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
static int kmod_loop_msg;
+ /*
+ * We don't allow synchronous module loading from async. Module
+ * init may invoke async_synchronize_full() which will end up
+ * waiting for this task which already is waiting for the module
+ * loading to complete, leading to a deadlock.
+ */
+ WARN_ON_ONCE(wait && current_is_async());
+
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 26058d0bebba..bfe8ae22f710 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -83,7 +83,7 @@
#endif
#include "sched.h"
-#include "../workqueue_sched.h"
+#include "../workqueue_internal.h"
#include "../smpboot.h"
#define CREATE_TRACE_POINTS
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fbc6576a83c3..2ffa240052fa 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,8 +41,9 @@
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <linux/idr.h>
+#include <linux/hashtable.h>
-#include "workqueue_sched.h"
+#include "workqueue_internal.h"
enum {
/*
@@ -82,8 +83,6 @@ enum {
NR_WORKER_POOLS = 2, /* # worker pools per gcwq */
BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
- BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
- BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1,
MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
@@ -123,33 +122,7 @@ enum {
* W: workqueue_lock protected.
*/
-struct global_cwq;
-struct worker_pool;
-
-/*
- * The poor guys doing the actual heavy lifting. All on-duty workers
- * are either serving the manager role, on idle list or on busy hash.
- */
-struct worker {
- /* on idle list while idle, on busy hash table while busy */
- union {
- struct list_head entry; /* L: while idle */
- struct hlist_node hentry; /* L: while busy */
- };
-
- struct work_struct *current_work; /* L: work being processed */
- struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
- struct list_head scheduled; /* L: scheduled works */
- struct task_struct *task; /* I: worker task */
- struct worker_pool *pool; /* I: the associated pool */
- /* 64 bytes boundary on 64bit, 32 on 32bit */
- unsigned long last_active; /* L: last active timestamp */
- unsigned int flags; /* X: flags */
- int id; /* I: worker id */
-
- /* for rebinding worker to CPU */
- struct work_struct rebind_work; /* L: for busy worker */
-};
+/* struct worker is defined in workqueue_internal.h */
struct worker_pool {
struct global_cwq *gcwq; /* I: the owning gcwq */
@@ -180,7 +153,7 @@ struct global_cwq {
unsigned int flags; /* L: GCWQ_* flags */
/* workers are chained either in busy_hash or pool idle_list */
- struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
+ DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
/* L: hash of busy workers */
struct worker_pool pools[NR_WORKER_POOLS];
@@ -285,8 +258,7 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
(pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++)
#define for_each_busy_worker(worker, i, pos, gcwq) \
- for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \
- hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
+ hash_for_each(gcwq->busy_hash, i, pos, worker, hentry)
static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
unsigned int sw)
@@ -764,12 +736,20 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
unsigned int cpu)
{
struct worker *worker = kthread_data(task), *to_wakeup = NULL;
- struct worker_pool *pool = worker->pool;
- atomic_t *nr_running = get_pool_nr_running(pool);
+ struct worker_pool *pool;
+ atomic_t *nr_running;
+ /*
+ * Rescuers, which may not have all the fields set up like normal
+ * workers, also reach here, let's not access anything before
+ * checking NOT_RUNNING.
+ */
if (worker->flags & WORKER_NOT_RUNNING)
return NULL;
+ pool = worker->pool;
+ nr_running = get_pool_nr_running(pool);
+
/* this can only happen on the local cpu */
BUG_ON(cpu != raw_smp_processor_id());
@@ -859,41 +839,31 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
}
/**
- * busy_worker_head - return the busy hash head for a work
- * @gcwq: gcwq of interest
- * @work: work to be hashed
- *
- * Return hash head of @gcwq for @work.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- *
- * RETURNS:
- * Pointer to the hash head.
- */
-static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
- struct work_struct *work)
-{
- const int base_shift = ilog2(sizeof(struct work_struct));
- unsigned long v = (unsigned long)work;
-
- /* simple shift and fold hash, do we need something better? */
- v >>= base_shift;
- v += v >> BUSY_WORKER_HASH_ORDER;
- v &= BUSY_WORKER_HASH_MASK;
-
- return &gcwq->busy_hash[v];
-}
-
-/**
- * __find_worker_executing_work - find worker which is executing a work
+ * find_worker_executing_work - find worker which is executing a work
* @gcwq: gcwq of interest
- * @bwh: hash head as returned by busy_worker_head()
* @work: work to find worker for
*
- * Find a worker which is executing @work on @gcwq. @bwh should be
- * the hash head obtained by calling busy_worker_head() with the same
- * work.
+ * Find a worker which is executing @work on @gcwq by searching
+ * @gcwq->busy_hash which is keyed by the address of @work. For a worker
+ * to match, its current execution should match the address of @work and
+ * its work function. This is to avoid unwanted dependency between
+ * unrelated work executions through a work item being recycled while still
+ * being executed.
+ *
+ * This is a bit tricky. A work item may be freed once its execution
+ * starts and nothing prevents the freed area from being recycled for
+ * another work item. If the same work item address ends up being reused
+ * before the original execution finishes, workqueue will identify the
+ * recycled work item as currently executing and make it wait until the
+ * current execution finishes, introducing an unwanted dependency.
+ *
+ * This function checks the work item address, work function and workqueue
+ * to avoid false positives. Note that this isn't complete as one may
+ * construct a work function which can introduce dependency onto itself
+ * through a recycled work item. Well, if somebody wants to shoot oneself
+ * in the foot that badly, there's only so much we can do, and if such
+ * deadlock actually occurs, it should be easy to locate the culprit work
+ * function.
*
* CONTEXT:
* spin_lock_irq(gcwq->lock).
@@ -902,40 +872,19 @@ static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
* Pointer to worker which is executing @work if found, NULL
* otherwise.
*/
-static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
- struct hlist_head *bwh,
- struct work_struct *work)
+static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
+ struct work_struct *work)
{
struct worker *worker;
struct hlist_node *tmp;
- hlist_for_each_entry(worker, tmp, bwh, hentry)
- if (worker->current_work == work)
+ hash_for_each_possible(gcwq->busy_hash, worker, tmp, hentry,
+ (unsigned long)work)
+ if (worker->current_work == work &&
+ worker->current_func == work->func)
return worker;
- return NULL;
-}
-/**
- * find_worker_executing_work - find worker which is executing a work
- * @gcwq: gcwq of interest
- * @work: work to find worker for
- *
- * Find a worker which is executing @work on @gcwq. This function is
- * identical to __find_worker_executing_work() except that this
- * function calculates @bwh itself.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- *
- * RETURNS:
- * Pointer to worker which is executing @work if found, NULL
- * otherwise.
- */
-static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
- struct work_struct *work)
-{
- return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
- work);
+ return NULL;
}
/**
@@ -2166,9 +2115,7 @@ __acquires(&gcwq->lock)
struct cpu_workqueue_struct *cwq = get_work_cwq(work);
struct worker_pool *pool = worker->pool;
struct global_cwq *gcwq = pool->gcwq;
- struct hlist_head *bwh = busy_worker_head(gcwq, work);
bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
- work_func_t f = work->func;
int work_color;
struct worker *collision;
#ifdef CONFIG_LOCKDEP
@@ -2198,7 +2145,7 @@ __acquires(&gcwq->lock)
* already processing the work. If so, defer the work to the
* currently executing one.
*/
- collision = __find_worker_executing_work(gcwq, bwh, work);
+ collision = find_worker_executing_work(gcwq, work);
if (unlikely(collision)) {
move_linked_works(work, &collision->scheduled, NULL);
return;
@@ -2206,8 +2153,9 @@ __acquires(&gcwq->lock)
/* claim and dequeue */
debug_work_deactivate(work);
- hlist_add_head(&worker->hentry, bwh);
+ hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)work);
worker->current_work = work;
+ worker->current_func = work->func;
worker->current_cwq = cwq;
work_color = get_work_color(work);
@@ -2240,7 +2188,7 @@ __acquires(&gcwq->lock)
lock_map_acquire_read(&cwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
trace_workqueue_execute_start(work);
- f(work);
+ worker->current_func(work);
/*
* While we must be careful to not use "work" after this, the trace
* point will only record its address.
@@ -2252,7 +2200,8 @@ __acquires(&gcwq->lock)
if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
" last function: %pf\n",
- current->comm, preempt_count(), task_pid_nr(current), f);
+ current->comm, preempt_count(), task_pid_nr(current),
+ worker->current_func);
debug_show_held_locks(current);
dump_stack();
}
@@ -2264,8 +2213,9 @@ __acquires(&gcwq->lock)
worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
/* we're done with it, release */
- hlist_del_init(&worker->hentry);
+ hash_del(&worker->hentry);
worker->current_work = NULL;
+ worker->current_func = NULL;
worker->current_cwq = NULL;
cwq_dec_nr_in_flight(cwq, work_color);
}
@@ -2388,7 +2338,7 @@ sleep:
/**
* rescuer_thread - the rescuer thread function
- * @__wq: the associated workqueue
+ * @__rescuer: self
*
* Workqueue rescuer thread function. There's one rescuer for each
* workqueue which has WQ_RESCUER set.
@@ -2405,20 +2355,27 @@ sleep:
*
* This should happen rarely.
*/
-static int rescuer_thread(void *__wq)
+static int rescuer_thread(void *__rescuer)
{
- struct workqueue_struct *wq = __wq;
- struct worker *rescuer = wq->rescuer;
+ struct worker *rescuer = __rescuer;
+ struct workqueue_struct *wq = rescuer->rescue_wq;
struct list_head *scheduled = &rescuer->scheduled;
bool is_unbound = wq->flags & WQ_UNBOUND;
unsigned int cpu;
set_user_nice(current, RESCUER_NICE_LEVEL);
+
+ /*
+ * Mark rescuer as worker too. As WORKER_PREP is never cleared, it
+ * doesn't participate in concurrency management.
+ */
+ rescuer->task->flags |= PF_WQ_WORKER;
repeat:
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop()) {
__set_current_state(TASK_RUNNING);
+ rescuer->task->flags &= ~PF_WQ_WORKER;
return 0;
}
@@ -2462,6 +2419,8 @@ repeat:
spin_unlock_irq(&gcwq->lock);
}
+ /* rescuers should never participate in concurrency management */
+ WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
schedule();
goto repeat;
}
@@ -3297,7 +3256,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
if (!rescuer)
goto err;
- rescuer->task = kthread_create(rescuer_thread, wq, "%s",
+ rescuer->rescue_wq = wq;
+ rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
wq->name);
if (IS_ERR(rescuer->task))
goto err;
@@ -3831,7 +3791,6 @@ out_unlock:
static int __init init_workqueues(void)
{
unsigned int cpu;
- int i;
/* make sure we have enough bits for OFFQ CPU number */
BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
@@ -3849,8 +3808,7 @@ static int __init init_workqueues(void)
gcwq->cpu = cpu;
gcwq->flags |= GCWQ_DISASSOCIATED;
- for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
- INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
+ hash_init(gcwq->busy_hash);
for_each_worker_pool(pool, gcwq) {
pool->gcwq = gcwq;
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
new file mode 100644
index 000000000000..cc35e7e62091
--- /dev/null
+++ b/kernel/workqueue_internal.h
@@ -0,0 +1,66 @@
+/*
+ * kernel/workqueue_internal.h
+ *
+ * Workqueue internal header file. Only to be included by workqueue and
+ * core kernel subsystems.
+ */
+#ifndef _KERNEL_WORKQUEUE_INTERNAL_H
+#define _KERNEL_WORKQUEUE_INTERNAL_H
+
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+
+struct global_cwq;
+struct worker_pool;
+
+/*
+ * The poor guys doing the actual heavy lifting. All on-duty workers are
+ * either serving the manager role, on idle list or on busy hash. For
+ * details on the locking annotation (L, I, X...), refer to workqueue.c.
+ *
+ * Only to be used in workqueue and async.
+ */
+struct worker {
+ /* on idle list while idle, on busy hash table while busy */
+ union {
+ struct list_head entry; /* L: while idle */
+ struct hlist_node hentry; /* L: while busy */
+ };
+
+ struct work_struct *current_work; /* L: work being processed */
+ work_func_t current_func; /* L: current_work's fn */
+ struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
+ struct list_head scheduled; /* L: scheduled works */
+ struct task_struct *task; /* I: worker task */
+ struct worker_pool *pool; /* I: the associated pool */
+ /* 64 bytes boundary on 64bit, 32 on 32bit */
+ unsigned long last_active; /* L: last active timestamp */
+ unsigned int flags; /* X: flags */
+ int id; /* I: worker id */
+
+ /* for rebinding worker to CPU */
+ struct work_struct rebind_work; /* L: for busy worker */
+
+ /* used only by rescuers to point to the target workqueue */
+ struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
+};
+
+/**
+ * current_wq_worker - return struct worker if %current is a workqueue worker
+ */
+static inline struct worker *current_wq_worker(void)
+{
+ if (current->flags & PF_WQ_WORKER)
+ return kthread_data(current);
+ return NULL;
+}
+
+/*
+ * Scheduler hooks for concurrency managed workqueue. Only to be used from
+ * sched.c and workqueue.c.
+ */
+void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
+struct task_struct *wq_worker_sleeping(struct task_struct *task,
+ unsigned int cpu);
+
+#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
deleted file mode 100644
index 2d10fc98dc79..000000000000
--- a/kernel/workqueue_sched.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * kernel/workqueue_sched.h
- *
- * Scheduler hooks for concurrency managed workqueue. Only to be
- * included from sched.c and workqueue.c.
- */
-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
-struct task_struct *wq_worker_sleeping(struct task_struct *task,
- unsigned int cpu);