summaryrefslogtreecommitdiff
path: root/kernel/kthread.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/kthread.c')
-rw-r--r--kernel/kthread.c230
1 files changed, 201 insertions, 29 deletions
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c5e40830c1f2..85fc068f0083 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -35,6 +35,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;
+static LIST_HEAD(kthreads_hotplug);
+static DEFINE_MUTEX(kthreads_hotplug_lock);
+
struct kthread_create_info
{
/* Information passed to kthread() from kthreadd. */
@@ -53,6 +56,8 @@ struct kthread_create_info
struct kthread {
unsigned long flags;
unsigned int cpu;
+ unsigned int node;
+ int started;
int result;
int (*threadfn)(void *);
void *data;
@@ -63,6 +68,9 @@ struct kthread {
#endif
/* To store the full name if task comm is truncated. */
char *full_name;
+ struct task_struct *task;
+ struct list_head hotplug_node;
+ struct cpumask *preferred_affinity;
};
enum KTHREAD_BITS {
@@ -101,7 +109,7 @@ void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
struct kthread *kthread = to_kthread(tsk);
if (!kthread || !kthread->full_name) {
- __get_task_comm(buf, buf_size, tsk);
+ strscpy(buf, tsk->comm, buf_size);
return;
}
@@ -121,8 +129,11 @@ bool set_kthread_struct(struct task_struct *p)
init_completion(&kthread->exited);
init_completion(&kthread->parked);
+ INIT_LIST_HEAD(&kthread->hotplug_node);
p->vfork_done = &kthread->exited;
+ kthread->task = p;
+ kthread->node = tsk_fork_get_node(current);
p->worker_private = kthread;
return true;
}
@@ -313,8 +324,19 @@ void __noreturn kthread_exit(long result)
{
struct kthread *kthread = to_kthread(current);
kthread->result = result;
+ if (!list_empty(&kthread->hotplug_node)) {
+ mutex_lock(&kthreads_hotplug_lock);
+ list_del(&kthread->hotplug_node);
+ mutex_unlock(&kthreads_hotplug_lock);
+
+ if (kthread->preferred_affinity) {
+ kfree(kthread->preferred_affinity);
+ kthread->preferred_affinity = NULL;
+ }
+ }
do_exit(0);
}
+EXPORT_SYMBOL(kthread_exit);
/**
* kthread_complete_and_exit - Exit the current kthread.
@@ -337,6 +359,56 @@ void __noreturn kthread_complete_and_exit(struct completion *comp, long code)
}
EXPORT_SYMBOL(kthread_complete_and_exit);
+static void kthread_fetch_affinity(struct kthread *kthread, struct cpumask *cpumask)
+{
+ const struct cpumask *pref;
+
+ if (kthread->preferred_affinity) {
+ pref = kthread->preferred_affinity;
+ } else {
+ if (WARN_ON_ONCE(kthread->node == NUMA_NO_NODE))
+ return;
+ pref = cpumask_of_node(kthread->node);
+ }
+
+ cpumask_and(cpumask, pref, housekeeping_cpumask(HK_TYPE_KTHREAD));
+ if (cpumask_empty(cpumask))
+ cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_KTHREAD));
+}
+
+static void kthread_affine_node(void)
+{
+ struct kthread *kthread = to_kthread(current);
+ cpumask_var_t affinity;
+
+ WARN_ON_ONCE(kthread_is_per_cpu(current));
+
+ if (kthread->node == NUMA_NO_NODE) {
+ housekeeping_affine(current, HK_TYPE_KTHREAD);
+ } else {
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ mutex_lock(&kthreads_hotplug_lock);
+ WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
+ list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
+ /*
+ * The node cpumask is racy when read from kthread() but:
+ * - a racing CPU going down will either fail on the subsequent
+ * call to set_cpus_allowed_ptr() or be migrated to housekeepers
+ * afterwards by the scheduler.
+ * - a racing CPU going up will be handled by kthreads_online_cpu()
+ */
+ kthread_fetch_affinity(kthread, affinity);
+ set_cpus_allowed_ptr(current, affinity);
+ mutex_unlock(&kthreads_hotplug_lock);
+
+ free_cpumask_var(affinity);
+ }
+}
+
static int kthread(void *_create)
{
static const struct sched_param param = { .sched_priority = 0 };
@@ -367,7 +439,6 @@ static int kthread(void *_create)
* back to default in case they have been changed.
*/
sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
- set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE);
@@ -381,6 +452,11 @@ static int kthread(void *_create)
schedule_preempt_disabled();
preempt_enable();
+ self->started = 1;
+
+ if (!(current->flags & PF_NO_SETAFFINITY) && !self->preferred_affinity)
+ kthread_affine_node();
+
ret = -EINTR;
if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
cgroup_kthread_ready();
@@ -539,7 +615,9 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int
void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
{
+ struct kthread *kthread = to_kthread(p);
__kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
+ WARN_ON_ONCE(kthread->started);
}
/**
@@ -553,7 +631,9 @@ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
*/
void kthread_bind(struct task_struct *p, unsigned int cpu)
{
+ struct kthread *kthread = to_kthread(p);
__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
+ WARN_ON_ONCE(kthread->started);
}
EXPORT_SYMBOL(kthread_bind);
@@ -622,6 +702,8 @@ void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = to_kthread(k);
+ if (!test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))
+ return;
/*
* Newly created kthread was parked when the CPU was offline.
* The binding was lost and we need to set it again.
@@ -735,10 +817,11 @@ EXPORT_SYMBOL(kthread_stop_put);
int kthreadd(void *unused)
{
+ static const char comm[TASK_COMM_LEN] = "kthreadd";
struct task_struct *tsk = current;
/* Setup a clean context for our children to inherit. */
- set_task_comm(tsk, "kthreadd");
+ set_task_comm(tsk, comm);
ignore_signals(tsk);
set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
set_mems_allowed(node_states[N_MEMORY]);
@@ -771,6 +854,92 @@ int kthreadd(void *unused)
return 0;
}
+int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
+{
+ struct kthread *kthread = to_kthread(p);
+ cpumask_var_t affinity;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ WARN_ON_ONCE(kthread->preferred_affinity);
+
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
+ return -ENOMEM;
+
+ kthread->preferred_affinity = kzalloc(sizeof(struct cpumask), GFP_KERNEL);
+ if (!kthread->preferred_affinity) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_lock(&kthreads_hotplug_lock);
+ cpumask_copy(kthread->preferred_affinity, mask);
+ WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
+ list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
+ kthread_fetch_affinity(kthread, affinity);
+
+ /* It's safe because the task is inactive. */
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
+ do_set_cpus_allowed(p, affinity);
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+ mutex_unlock(&kthreads_hotplug_lock);
+out:
+ free_cpumask_var(affinity);
+
+ return ret;
+}
+
+/*
+ * Re-affine kthreads according to their preferences
+ * and the newly online CPU. The CPU down part is handled
+ * by select_fallback_rq() which default re-affines to
+ * housekeepers from other nodes in case the preferred
+ * affinity doesn't apply anymore.
+ */
+static int kthreads_online_cpu(unsigned int cpu)
+{
+ cpumask_var_t affinity;
+ struct kthread *k;
+ int ret;
+
+ guard(mutex)(&kthreads_hotplug_lock);
+
+ if (list_empty(&kthreads_hotplug))
+ return 0;
+
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = 0;
+
+ list_for_each_entry(k, &kthreads_hotplug, hotplug_node) {
+ if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) ||
+ kthread_is_per_cpu(k->task))) {
+ ret = -EINVAL;
+ continue;
+ }
+ kthread_fetch_affinity(k, affinity);
+ set_cpus_allowed_ptr(k->task, affinity);
+ }
+
+ free_cpumask_var(affinity);
+
+ return ret;
+}
+
+static int kthreads_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_KTHREADS_ONLINE, "kthreads:online",
+ kthreads_online_cpu, NULL);
+}
+early_initcall(kthreads_init);
+
void __kthread_init_worker(struct kthread_worker *worker,
const char *name,
struct lock_class_key *key)
@@ -844,8 +1013,16 @@ repeat:
* event only cares about the address.
*/
trace_sched_kthread_work_execute_end(work, func);
- } else if (!freezing(current))
+ } else if (!freezing(current)) {
schedule();
+ } else {
+ /*
+ * Handle the case where the current remains
+ * TASK_INTERRUPTIBLE. try_to_freeze() expects
+ * the current to be TASK_RUNNING.
+ */
+ __set_current_state(TASK_RUNNING);
+ }
try_to_freeze();
cond_resched();
@@ -854,12 +1031,11 @@ repeat:
EXPORT_SYMBOL_GPL(kthread_worker_fn);
static __printf(3, 0) struct kthread_worker *
-__kthread_create_worker(int cpu, unsigned int flags,
- const char namefmt[], va_list args)
+__kthread_create_worker_on_node(unsigned int flags, int node,
+ const char namefmt[], va_list args)
{
struct kthread_worker *worker;
struct task_struct *task;
- int node = NUMA_NO_NODE;
worker = kzalloc(sizeof(*worker), GFP_KERNEL);
if (!worker)
@@ -867,20 +1043,14 @@ __kthread_create_worker(int cpu, unsigned int flags,
kthread_init_worker(worker);
- if (cpu >= 0)
- node = cpu_to_node(cpu);
-
task = __kthread_create_on_node(kthread_worker_fn, worker,
- node, namefmt, args);
+ node, namefmt, args);
if (IS_ERR(task))
goto fail_task;
- if (cpu >= 0)
- kthread_bind(task, cpu);
-
worker->flags = flags;
worker->task = task;
- wake_up_process(task);
+
return worker;
fail_task:
@@ -889,8 +1059,9 @@ fail_task:
}
/**
- * kthread_create_worker - create a kthread worker
+ * kthread_create_worker_on_node - create a kthread worker
* @flags: flags modifying the default behavior of the worker
+ * @node: task structure for the thread is allocated on this node
* @namefmt: printf-style name for the kthread worker (task).
*
* Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
@@ -898,25 +1069,26 @@ fail_task:
* when the caller was killed by a fatal signal.
*/
struct kthread_worker *
-kthread_create_worker(unsigned int flags, const char namefmt[], ...)
+kthread_create_worker_on_node(unsigned int flags, int node, const char namefmt[], ...)
{
struct kthread_worker *worker;
va_list args;
va_start(args, namefmt);
- worker = __kthread_create_worker(-1, flags, namefmt, args);
+ worker = __kthread_create_worker_on_node(flags, node, namefmt, args);
va_end(args);
return worker;
}
-EXPORT_SYMBOL(kthread_create_worker);
+EXPORT_SYMBOL(kthread_create_worker_on_node);
/**
* kthread_create_worker_on_cpu - create a kthread worker and bind it
* to a given CPU and the associated NUMA node.
* @cpu: CPU number
* @flags: flags modifying the default behavior of the worker
- * @namefmt: printf-style name for the kthread worker (task).
+ * @namefmt: printf-style name for the thread. Format is restricted
+ * to "name.*%u". Code fills in cpu number.
*
* Use a valid CPU number if you want to bind the kthread worker
* to the given CPU and the associated NUMA node.
@@ -948,14 +1120,13 @@ EXPORT_SYMBOL(kthread_create_worker);
*/
struct kthread_worker *
kthread_create_worker_on_cpu(int cpu, unsigned int flags,
- const char namefmt[], ...)
+ const char namefmt[])
{
struct kthread_worker *worker;
- va_list args;
- va_start(args, namefmt);
- worker = __kthread_create_worker(cpu, flags, namefmt, args);
- va_end(args);
+ worker = kthread_create_worker_on_node(flags, cpu_to_node(cpu), namefmt, cpu);
+ if (!IS_ERR(worker))
+ kthread_bind(worker->task, cpu);
return worker;
}
@@ -1004,7 +1175,7 @@ static void kthread_insert_work(struct kthread_worker *worker,
* @work: kthread_work to queue
*
* Queue @work to work processor @task for async execution. @task
- * must have been created with kthread_worker_create(). Returns %true
+ * must have been created with kthread_create_worker(). Returns %true
* if @work was successfully queued, %false if it was already pending.
*
* Reinitialize the work if it needs to be used by another worker.
@@ -1036,7 +1207,8 @@ EXPORT_SYMBOL_GPL(kthread_queue_work);
*/
void kthread_delayed_work_timer_fn(struct timer_list *t)
{
- struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
+ struct kthread_delayed_work *dwork = timer_container_of(dwork, t,
+ timer);
struct kthread_work *work = &dwork->work;
struct kthread_worker *worker = work->worker;
unsigned long flags;
@@ -1191,14 +1363,14 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
struct kthread_worker *worker = work->worker;
/*
- * del_timer_sync() must be called to make sure that the timer
+ * timer_delete_sync() must be called to make sure that the timer
* callback is not running. The lock must be temporary released
* to avoid a deadlock with the callback. In the meantime,
* any queuing is blocked by setting the canceling counter.
*/
work->canceling++;
raw_spin_unlock_irqrestore(&worker->lock, *flags);
- del_timer_sync(&dwork->timer);
+ timer_delete_sync(&dwork->timer);
raw_spin_lock_irqsave(&worker->lock, *flags);
work->canceling--;
}