summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-21 17:10:05 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-21 17:10:05 -0800
commit1d6d3992235ed08929846f98fecf79682e0b422c (patch)
tree497802350ee468b58abc351c2bd4aabeeaf88d58
parent96c84703f1cf6ea43617f9565166681cd71df104 (diff)
parentd8b4bf4ea04dd96fe43f6010c614149aba4c9b91 (diff)
Merge tag 'kthread-for-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks
Pull kthread updates from Frederic Weisbecker: "Kthreads affinity follow either of 4 existing different patterns: 1) Per-CPU kthreads must stay affine to a single CPU and never execute relevant code on any other CPU. This is currently handled by smpboot code which takes care of CPU-hotplug operations. Affinity here is a correctness constraint. 2) Some kthreads _have_ to be affine to a specific set of CPUs and can't run anywhere else. The affinity is set through kthread_bind_mask() and the subsystem takes care by itself to handle CPU-hotplug operations. Affinity here is assumed to be a correctness constraint. 3) Per-node kthreads _prefer_ to be affine to a specific NUMA node. This is not a correctness constraint but merely a preference in terms of memory locality. kswapd and kcompactd both fall into this category. The affinity is set manually like for any other task and CPU-hotplug is supposed to be handled by the relevant subsystem so that the task is properly reaffined whenever a given CPU from the node comes up. Also care should be taken so that the node affinity doesn't cross isolated (nohz_full) cpumask boundaries. 4) Similar to the previous point except kthreads have a _preferred_ affinity different than a node. Both RCU boost kthreads and RCU exp kworkers fall into this category as they refer to "RCU nodes" from a distinctly distributed tree. Currently the preferred affinity patterns (3 and 4) have at least 4 identified users, with more or less success when it comes to handle CPU-hotplug operations and CPU isolation. Each of which do it in its own ad-hoc way. This is an infrastructure proposal to handle this with the following API changes: - kthread_create_on_node() automatically affines the created kthread to its target node unless it has been set as per-cpu or bound with kthread_bind[_mask]() before the first wake-up. - kthread_affine_preferred() is a new function that can be called right after kthread_create_on_node() to specify a preferred affinity different than the specified node. When the preferred affinity can't be applied because the possible targets are offline or isolated (nohz_full), the kthread is affine to the housekeeping CPUs (which means to all online CPUs most of the time or only the non-nohz_full CPUs when nohz_full= is set). kswapd, kcompactd, RCU boost kthreads and RCU exp kworkers have been converted, along with a few old drivers. Summary of the changes: - Consolidate a bunch of ad-hoc implementations of kthread_run_on_cpu() - Introduce task_cpu_fallback_mask() that defines the default last resort affinity of a task to become nohz_full aware - Add some correctness check to ensure kthread_bind() is always called before the first kthread wake up. - Default affine kthread to its preferred node. - Convert kswapd / kcompactd and remove their halfway working ad-hoc affinity implementation - Implement kthreads preferred affinity - Unify kthread worker and kthread API's style - Convert RCU kthreads to the new API and remove the ad-hoc affinity implementation" * tag 'kthread-for-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks: kthread: modify kernel-doc function name to match code rcu: Use kthread preferred affinity for RCU exp kworkers treewide: Introduce kthread_run_worker[_on_cpu]() kthread: Unify kthread_create_on_cpu() and kthread_create_worker_on_cpu() automatic format rcu: Use kthread preferred affinity for RCU boost kthread: Implement preferred affinity mm: Create/affine kswapd to its preferred node mm: Create/affine kcompactd to its preferred node kthread: Default affine kthread to its preferred NUMA node kthread: Make sure kthread hasn't started while binding it sched,arm64: Handle CPU isolation on last resort fallback rq selection arm64: Exclude nohz_full CPUs from 32bits el0 support lib: test_objpool: Use kthread_run_on_cpu() kallsyms: Use kthread_run_on_cpu() soc/qman: test: Use kthread_run_on_cpu() arm/bL_switcher: Use kthread_run_on_cpu()
-rw-r--r--Documentation/arch/arm64/asymmetric-32bit.rst8
-rw-r--r--arch/arm/common/bL_switcher.c10
-rw-r--r--arch/arm64/include/asm/cpufeature.h1
-rw-r--r--arch/arm64/include/asm/mmu_context.h14
-rw-r--r--arch/arm64/kernel/cpufeature.c15
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--crypto/crypto_engine.c2
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c2
-rw-r--r--drivers/gpu/drm/drm_vblank_work.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_slpc.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c8
-rw-r--r--drivers/gpu/drm/msm/disp/msm_disp_snapshot.c2
-rw-r--r--drivers/gpu/drm/msm/msm_atomic.c2
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/msm_kms.c2
-rw-r--r--drivers/media/platform/chips-media/wave5/wave5-vpu.c2
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dpll.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_gnss.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c2
-rw-r--r--drivers/platform/chrome/cros_ec_spi.c2
-rw-r--r--drivers/ptp/ptp_clock.c2
-rw-r--r--drivers/soc/fsl/qbman/qman_test_stash.c6
-rw-r--r--drivers/spi/spi.c2
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c2
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c2
-rw-r--r--drivers/watchdog/watchdog_dev.c2
-rw-r--r--fs/erofs/zdata.c2
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/kthread.h56
-rw-r--r--include/linux/mmu_context.h1
-rw-r--r--kernel/kallsyms_selftest.c4
-rw-r--r--kernel/kthread.c203
-rw-r--r--kernel/rcu/tree.c94
-rw-r--r--kernel/rcu/tree_plugin.h11
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/ext.c2
-rw-r--r--kernel/workqueue.c2
-rw-r--r--lib/test_objpool.c19
-rw-r--r--mm/compaction.c43
-rw-r--r--mm/vmscan.c8
-rw-r--r--net/dsa/tag_ksz.c2
-rw-r--r--net/dsa/tag_ocelot_8021q.c2
-rw-r--r--net/dsa/tag_sja1105.c2
47 files changed, 341 insertions, 221 deletions
diff --git a/Documentation/arch/arm64/asymmetric-32bit.rst b/Documentation/arch/arm64/asymmetric-32bit.rst
index 64a0b505da7d..1ca2b359a907 100644
--- a/Documentation/arch/arm64/asymmetric-32bit.rst
+++ b/Documentation/arch/arm64/asymmetric-32bit.rst
@@ -153,3 +153,11 @@ asymmetric system, a broken guest at EL1 could still attempt to execute
mode will return to host userspace with an ``exit_reason`` of
``KVM_EXIT_FAIL_ENTRY`` and will remain non-runnable until successfully
re-initialised by a subsequent ``KVM_ARM_VCPU_INIT`` operation.
+
+NOHZ FULL
+---------
+
+To avoid perturbing an adaptive-ticks CPU (specified using
+``nohz_full=``) when a 32-bit task is forcefully migrated, these CPUs
+are treated as 64-bit-only when support for asymmetric 32-bit systems
+is enabled.
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 9a9aa53547a6..d1e82a318e3b 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -307,13 +307,11 @@ static struct task_struct *bL_switcher_thread_create(int cpu, void *arg)
{
struct task_struct *task;
- task = kthread_create_on_node(bL_switcher_thread, arg,
- cpu_to_node(cpu), "kswitcher_%d", cpu);
- if (!IS_ERR(task)) {
- kthread_bind(task, cpu);
- wake_up_process(task);
- } else
+ task = kthread_run_on_cpu(bL_switcher_thread, arg,
+ cpu, "kswitcher_%d");
+ if (IS_ERR(task))
pr_err("%s failed for CPU %d\n", __func__, cpu);
+
return task;
}
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index a4d0b77a68d9..e0e4478f5fb5 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -671,6 +671,7 @@ static inline bool supports_clearbhb(int scope)
}
const struct cpumask *system_32bit_el0_cpumask(void);
+const struct cpumask *fallback_32bit_el0_cpumask(void);
DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
static inline bool system_supports_32bit_el0(void)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 48b3d9553b67..0dbe3b29049b 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -271,18 +271,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
}
static inline const struct cpumask *
-task_cpu_possible_mask(struct task_struct *p)
+__task_cpu_possible_mask(struct task_struct *p, const struct cpumask *mask)
{
if (!static_branch_unlikely(&arm64_mismatched_32bit_el0))
- return cpu_possible_mask;
+ return mask;
if (!is_compat_thread(task_thread_info(p)))
- return cpu_possible_mask;
+ return mask;
return system_32bit_el0_cpumask();
}
+
+static inline const struct cpumask *
+task_cpu_possible_mask(struct task_struct *p)
+{
+ return __task_cpu_possible_mask(p, cpu_possible_mask);
+}
#define task_cpu_possible_mask task_cpu_possible_mask
+const struct cpumask *task_cpu_fallback_mask(struct task_struct *p);
+
void verify_cpu_asid_bits(void);
void post_ttbr_update_workaround(void);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d41128e37701..4eb7c6698ae4 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -75,6 +75,7 @@
#include <linux/cpu.h>
#include <linux/kasan.h>
#include <linux/percpu.h>
+#include <linux/sched/isolation.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
@@ -1644,6 +1645,11 @@ const struct cpumask *system_32bit_el0_cpumask(void)
return cpu_possible_mask;
}
+const struct cpumask *task_cpu_fallback_mask(struct task_struct *p)
+{
+ return __task_cpu_possible_mask(p, housekeeping_cpumask(HK_TYPE_TICK));
+}
+
static int __init parse_32bit_el0_param(char *str)
{
allow_mismatched_32bit_el0 = true;
@@ -3773,7 +3779,14 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
static int lucky_winner = -1;
struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
- bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
+ bool cpu_32bit = false;
+
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ if (!housekeeping_cpu(cpu, HK_TYPE_TICK))
+ pr_info("Treating adaptive-ticks CPU %u as 64-bit only\n", cpu);
+ else
+ cpu_32bit = true;
+ }
if (cpu_32bit) {
cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index cd57a517d04a..d7ab8780ab9e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -681,7 +681,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
pid_nr = pid_vnr(pid);
put_pid(pid);
- pit->worker = kthread_create_worker(0, "kvm-pit/%d", pid_nr);
+ pit->worker = kthread_run_worker(0, "kvm-pit/%d", pid_nr);
if (IS_ERR(pit->worker))
goto fail_kthread;
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index e60a0eb628e8..c7c16da5e649 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -517,7 +517,7 @@ struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev,
crypto_init_queue(&engine->queue, qlen);
spin_lock_init(&engine->queue_lock);
- engine->kworker = kthread_create_worker(0, "%s", engine->name);
+ engine->kworker = kthread_run_worker(0, "%s", engine->name);
if (IS_ERR(engine->kworker)) {
dev_err(dev, "failed to create crypto request pump task\n");
return NULL;
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index bd8f75accfa0..2486a6c5256a 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -225,7 +225,7 @@ static void __init cppc_freq_invariance_init(void)
if (fie_disabled)
return;
- kworker_fie = kthread_create_worker(0, "cppc_fie");
+ kworker_fie = kthread_run_worker(0, "cppc_fie");
if (IS_ERR(kworker_fie)) {
pr_warn("%s: failed to create kworker_fie: %ld\n", __func__,
PTR_ERR(kworker_fie));
diff --git a/drivers/gpu/drm/drm_vblank_work.c b/drivers/gpu/drm/drm_vblank_work.c
index 1752ffb44e1d..9cc71120246f 100644
--- a/drivers/gpu/drm/drm_vblank_work.c
+++ b/drivers/gpu/drm/drm_vblank_work.c
@@ -277,7 +277,7 @@ int drm_vblank_worker_init(struct drm_vblank_crtc *vblank)
INIT_LIST_HEAD(&vblank->pending_work);
init_waitqueue_head(&vblank->work_wait_queue);
- worker = kthread_create_worker(0, "card%d-crtc%d",
+ worker = kthread_run_worker(0, "card%d-crtc%d",
vblank->dev->primary->index,
vblank->pipe);
if (IS_ERR(worker))
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 89d4dc8b60c6..eb0158e43417 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -369,7 +369,7 @@ static int live_parallel_switch(void *arg)
if (!data[n].ce[0])
continue;
- worker = kthread_create_worker(0, "igt/parallel:%s",
+ worker = kthread_run_worker(0, "igt/parallel:%s",
data[n].ce[0]->engine->name);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 222ca7c44951..81c31396eceb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -3574,7 +3574,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
arg[id].batch = NULL;
arg[id].count = 0;
- worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
+ worker[id] = kthread_run_worker(0, "igt/smoke:%d", id);
if (IS_ERR(worker[id])) {
err = PTR_ERR(worker[id]);
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 9ce8ff1c04fe..9d3aeb237295 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1025,7 +1025,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
threads[tmp].engine = other;
threads[tmp].flags = flags;
- worker = kthread_create_worker(0, "igt/%s",
+ worker = kthread_run_worker(0, "igt/%s",
other->name);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 4ecc4ae74a54..e218b229681f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -489,7 +489,7 @@ static int live_slpc_tile_interaction(void *arg)
return -ENOMEM;
for_each_gt(gt, i915, i) {
- threads[i].worker = kthread_create_worker(0, "igt/slpc_parallel:%d", gt->info.id);
+ threads[i].worker = kthread_run_worker(0, "igt/slpc_parallel:%d", gt->info.id);
if (IS_ERR(threads[i].worker)) {
ret = PTR_ERR(threads[i].worker);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index acae30a04a94..88870844b5bd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -492,7 +492,7 @@ static int mock_breadcrumbs_smoketest(void *arg)
for (n = 0; n < ncpus; n++) {
struct kthread_worker *worker;
- worker = kthread_create_worker(0, "igt/%d", n);
+ worker = kthread_run_worker(0, "igt/%d", n);
if (IS_ERR(worker)) {
ret = PTR_ERR(worker);
ncpus = n;
@@ -1645,7 +1645,7 @@ static int live_parallel_engines(void *arg)
for_each_uabi_engine(engine, i915) {
struct kthread_worker *worker;
- worker = kthread_create_worker(0, "igt/parallel:%s",
+ worker = kthread_run_worker(0, "igt/parallel:%s",
engine->name);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
@@ -1806,7 +1806,7 @@ static int live_breadcrumbs_smoketest(void *arg)
unsigned int i = idx * ncpus + n;
struct kthread_worker *worker;
- worker = kthread_create_worker(0, "igt/%d.%d", idx, n);
+ worker = kthread_run_worker(0, "igt/%d.%d", idx, n);
if (IS_ERR(worker)) {
ret = PTR_ERR(worker);
goto out_flush;
@@ -3219,7 +3219,7 @@ static int perf_parallel_engines(void *arg)
memset(&engines[idx].p, 0, sizeof(engines[idx].p));
- worker = kthread_create_worker(0, "igt:%s",
+ worker = kthread_run_worker(0, "igt:%s",
engine->name);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
index e75b97127c0d..2be00b11e557 100644
--- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
+++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
@@ -109,7 +109,7 @@ int msm_disp_snapshot_init(struct drm_device *drm_dev)
mutex_init(&kms->dump_mutex);
- kms->dump_worker = kthread_create_worker(0, "%s", "disp_snapshot");
+ kms->dump_worker = kthread_run_worker(0, "%s", "disp_snapshot");
if (IS_ERR(kms->dump_worker))
DRM_ERROR("failed to create disp state task\n");
diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index 9c45d641b521..a7a2384044ff 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c
@@ -115,7 +115,7 @@ int msm_atomic_init_pending_timer(struct msm_pending_timer *timer,
timer->kms = kms;
timer->crtc_idx = crtc_idx;
- timer->worker = kthread_create_worker(0, "atomic-worker-%d", crtc_idx);
+ timer->worker = kthread_run_worker(0, "atomic-worker-%d", crtc_idx);
if (IS_ERR(timer->worker)) {
int ret = PTR_ERR(timer->worker);
timer->worker = NULL;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 0d4a3744cfcb..8557998e0c92 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -859,7 +859,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
gpu->funcs = funcs;
gpu->name = name;
- gpu->worker = kthread_create_worker(0, "gpu-worker");
+ gpu->worker = kthread_run_worker(0, "gpu-worker");
if (IS_ERR(gpu->worker)) {
ret = PTR_ERR(gpu->worker);
gpu->worker = NULL;
diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
index 4cfad12f4dc1..38965e12a6bf 100644
--- a/drivers/gpu/drm/msm/msm_kms.c
+++ b/drivers/gpu/drm/msm/msm_kms.c
@@ -268,7 +268,7 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv)
/* initialize event thread */
ev_thread = &priv->event_thread[drm_crtc_index(crtc)];
ev_thread->dev = ddev;
- ev_thread->worker = kthread_create_worker(0, "crtc_event:%d", crtc->base.id);
+ ev_thread->worker = kthread_run_worker(0, "crtc_event:%d", crtc->base.id);
if (IS_ERR(ev_thread->worker)) {
ret = PTR_ERR(ev_thread->worker);
DRM_DEV_ERROR(dev, "failed to create crtc_event kthread\n");
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu.c b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
index 6b294a2d6717..d1320298a0f7 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpu.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
@@ -271,7 +271,7 @@ static int wave5_vpu_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "failed to get irq resource, falling back to polling\n");
hrtimer_init(&dev->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
dev->hrtimer.function = &wave5_vpu_timer_callback;
- dev->worker = kthread_create_worker(0, "vpu_irq_thread");
+ dev->worker = kthread_run_worker(0, "vpu_irq_thread");
if (IS_ERR(dev->worker)) {
dev_err(&pdev->dev, "failed to create vpu irq worker\n");
ret = PTR_ERR(dev->worker);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 3a792f79270d..377e66cf7a48 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -394,7 +394,7 @@ static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
kthread_init_delayed_work(&chip->irq_poll_work,
mv88e6xxx_irq_poll);
- chip->kworker = kthread_create_worker(0, "%s", dev_name(chip->dev));
+ chip->kworker = kthread_run_worker(0, "%s", dev_name(chip->dev));
if (IS_ERR(chip->kworker))
return PTR_ERR(chip->kworker);
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
index 38e151c7ea23..8d806d8ad761 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -2053,7 +2053,7 @@ static int ice_dpll_init_worker(struct ice_pf *pf)
struct kthread_worker *kworker;
kthread_init_delayed_work(&d->work, ice_dpll_periodic_work);
- kworker = kthread_create_worker(0, "ice-dplls-%s",
+ kworker = kthread_run_worker(0, "ice-dplls-%s",
dev_name(ice_pf_to_dev(pf)));
if (IS_ERR(kworker))
return PTR_ERR(kworker);
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c
index f02e8ca55375..b2148dbe49b2 100644
--- a/drivers/net/ethernet/intel/ice/ice_gnss.c
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.c
@@ -182,7 +182,7 @@ static struct gnss_serial *ice_gnss_struct_init(struct ice_pf *pf)
pf->gnss_serial = gnss;
kthread_init_delayed_work(&gnss->read_work, ice_gnss_read);
- kworker = kthread_create_worker(0, "ice-gnss-%s", dev_name(dev));
+ kworker = kthread_run_worker(0, "ice-gnss-%s", dev_name(dev));
if (IS_ERR(kworker)) {
kfree(gnss);
return NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index efd770dfec44..12b4eef84b77 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -3072,7 +3072,7 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp)
/* Allocate a kworker for handling work required for the ports
* connected to the PTP hardware clock.
*/
- kworker = kthread_create_worker(0, "ice-ptp-%s",
+ kworker = kthread_run_worker(0, "ice-ptp-%s",
dev_name(ice_pf_to_dev(pf)));
if (IS_ERR(kworker))
return PTR_ERR(kworker);
diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c
index 86a3d32a7763..08f566cc1480 100644
--- a/drivers/platform/chrome/cros_ec_spi.c
+++ b/drivers/platform/chrome/cros_ec_spi.c
@@ -715,7 +715,7 @@ static int cros_ec_spi_devm_high_pri_alloc(struct device *dev,
int err;
ec_spi->high_pri_worker =
- kthread_create_worker(0, "cros_ec_spi_high_pri");
+ kthread_run_worker(0, "cros_ec_spi_high_pri");
if (IS_ERR(ec_spi->high_pri_worker)) {
err = PTR_ERR(ec_spi->high_pri_worker);
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 77a36e7bddd5..b932425ddc6a 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -296,7 +296,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
if (ptp->info->do_aux_work) {
kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker);
- ptp->kworker = kthread_create_worker(0, "ptp%d", ptp->index);
+ ptp->kworker = kthread_run_worker(0, "ptp%d", ptp->index);
if (IS_ERR(ptp->kworker)) {
err = PTR_ERR(ptp->kworker);
pr_err("failed to create ptp aux_worker %d\n", err);
diff --git a/drivers/soc/fsl/qbman/qman_test_stash.c b/drivers/soc/fsl/qbman/qman_test_stash.c
index b7e8e5ec884c..f4d3c2146f4f 100644
--- a/drivers/soc/fsl/qbman/qman_test_stash.c
+++ b/drivers/soc/fsl/qbman/qman_test_stash.c
@@ -108,14 +108,12 @@ static int on_all_cpus(int (*fn)(void))
.fn = fn,
.started = ATOMIC_INIT(0)
};
- struct task_struct *k = kthread_create(bstrap_fn, &bstrap,
- "hotpotato%d", cpu);
+ struct task_struct *k = kthread_run_on_cpu(bstrap_fn, &bstrap,
+ cpu, "hotpotato%d");
int ret;
if (IS_ERR(k))
return -ENOMEM;
- kthread_bind(k, cpu);
- wake_up_process(k);
/*
* If we call kthread_stop() before the "wake up" has had an
* effect, then the thread may exit with -EINTR without ever
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ff1add2ecb91..e4aa8f838934 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2060,7 +2060,7 @@ static int spi_init_queue(struct spi_controller *ctlr)
ctlr->busy = false;
ctlr->queue_empty = true;
- ctlr->kworker = kthread_create_worker(0, dev_name(&ctlr->dev));
+ ctlr->kworker = kthread_run_worker(0, dev_name(&ctlr->dev));
if (IS_ERR(ctlr->kworker)) {
dev_err(&ctlr->dev, "failed to create message pump kworker\n");
return PTR_ERR(ctlr->kworker);
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 6021eeb903fe..95c0c63119ac 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -7635,7 +7635,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
mutex_init(&port->lock);
mutex_init(&port->swap_lock);
- port->wq = kthread_create_worker(0, dev_name(dev));
+ port->wq = kthread_run_worker(0, dev_name(dev));
if (IS_ERR(port->wq))
return ERR_CAST(port->wq);
sched_set_fifo(port->wq->task);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 8ffea8430f95..c204fc8e471a 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -229,7 +229,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
dev = &vdpasim->vdpa.dev;
kthread_init_work(&vdpasim->work, vdpasim_work_fn);
- vdpasim->worker = kthread_create_worker(0, "vDPA sim worker: %s",
+ vdpasim->worker = kthread_run_worker(0, "vDPA sim worker: %s",
dev_attr->name);
if (IS_ERR(vdpasim->worker))
goto err_iommu;
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 4190cb800cc4..19698d87dc57 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -1229,7 +1229,7 @@ int __init watchdog_dev_init(void)
{
int err;
- watchdog_kworker = kthread_create_worker(0, "watchdogd");
+ watchdog_kworker = kthread_run_worker(0, "watchdogd");
if (IS_ERR(watchdog_kworker)) {
pr_err("Failed to create watchdog kworker\n");
return PTR_ERR(watchdog_kworker);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 254f6ad2c336..33a128252687 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -320,7 +320,7 @@ static void erofs_destroy_percpu_workers(void)
static struct kthread_worker *erofs_init_percpu_worker(int cpu)
{
struct kthread_worker *worker =
- kthread_create_worker_on_cpu(cpu, 0, "erofs_worker/%u", cpu);
+ kthread_run_worker_on_cpu(cpu, 0, "erofs_worker/%u");
if (IS_ERR(worker))
return worker;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index a04b73c40173..6cc5e484547c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -240,6 +240,7 @@ enum cpuhp_state {
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RANDOM_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
+ CPUHP_AP_KTHREADS_ONLINE,
CPUHP_AP_BASE_CACHEINFO_ONLINE,
CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 40,
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index b11f53c1ba2e..8d27403888ce 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -85,6 +85,7 @@ kthread_run_on_cpu(int (*threadfn)(void *data), void *data,
void free_kthread_struct(struct task_struct *k);
void kthread_bind(struct task_struct *k, unsigned int cpu);
void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
+int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask);
int kthread_stop(struct task_struct *k);
int kthread_stop_put(struct task_struct *k);
bool kthread_should_stop(void);
@@ -186,13 +187,58 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
int kthread_worker_fn(void *worker_ptr);
-__printf(2, 3)
-struct kthread_worker *
-kthread_create_worker(unsigned int flags, const char namefmt[], ...);
+__printf(3, 4)
+struct kthread_worker *kthread_create_worker_on_node(unsigned int flags,
+ int node,
+ const char namefmt[], ...);
+
+#define kthread_create_worker(flags, namefmt, ...) \
+ kthread_create_worker_on_node(flags, NUMA_NO_NODE, namefmt, ## __VA_ARGS__);
+
+/**
+ * kthread_run_worker - create and wake a kthread worker.
+ * @flags: flags modifying the default behavior of the worker
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: Convenient wrapper for kthread_create_worker() followed by
+ * wake_up_process(). Returns the kthread_worker or ERR_PTR(-ENOMEM).
+ */
+#define kthread_run_worker(flags, namefmt, ...) \
+({ \
+ struct kthread_worker *__kw \
+ = kthread_create_worker(flags, namefmt, ## __VA_ARGS__); \
+ if (!IS_ERR(__kw)) \
+ wake_up_process(__kw->task); \
+ __kw; \
+})
-__printf(3, 4) struct kthread_worker *
+struct kthread_worker *
kthread_create_worker_on_cpu(int cpu, unsigned int flags,
- const char namefmt[], ...);
+ const char namefmt[]);
+
+/**
+ * kthread_run_worker_on_cpu - create and wake a cpu bound kthread worker.
+ * @cpu: CPU number
+ * @flags: flags modifying the default behavior of the worker
+ * @namefmt: printf-style name for the thread. Format is restricted
+ * to "name.*%u". Code fills in cpu number.
+ *
+ * Description: Convenient wrapper for kthread_create_worker_on_cpu()
+ * followed by wake_up_process(). Returns the kthread_worker or
+ * ERR_PTR(-ENOMEM).
+ */
+static inline struct kthread_worker *
+kthread_run_worker_on_cpu(int cpu, unsigned int flags,
+ const char namefmt[])
+{
+ struct kthread_worker *kw;
+
+ kw = kthread_create_worker_on_cpu(cpu, flags, namefmt);
+ if (!IS_ERR(kw))
+ wake_up_process(kw->task);
+
+ return kw;
+}
bool kthread_queue_work(struct kthread_worker *worker,
struct kthread_work *work);
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index bbaec80c78c5..ac01dc4eb2ce 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h
@@ -24,6 +24,7 @@ static inline void leave_mm(void) { }
#ifndef task_cpu_possible_mask
# define task_cpu_possible_mask(p) cpu_possible_mask
# define task_cpu_possible(cpu, p) true
+# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_TICK)
#else
# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
#endif
diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c
index 873f7c445488..cf4af5728307 100644
--- a/kernel/kallsyms_selftest.c
+++ b/kernel/kallsyms_selftest.c
@@ -435,13 +435,11 @@ static int __init kallsyms_test_init(void)
{
struct task_struct *t;
- t = kthread_create(test_entry, NULL, "kallsyms_test");
+ t = kthread_run_on_cpu(test_entry, NULL, 0, "kallsyms_test");
if (IS_ERR(t)) {
pr_info("Create kallsyms selftest task failed\n");
return PTR_ERR(t);
}
- kthread_bind(t, 0);
- wake_up_process(t);
return 0;
}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 1eb6f62a9165..6a034c76b6e9 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -35,6 +35,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;
+static LIST_HEAD(kthreads_hotplug);
+static DEFINE_MUTEX(kthreads_hotplug_lock);
+
struct kthread_create_info
{
/* Information passed to kthread() from kthreadd. */
@@ -53,6 +56,8 @@ struct kthread_create_info
struct kthread {
unsigned long flags;
unsigned int cpu;
+ unsigned int node;
+ int started;
int result;
int (*threadfn)(void *);
void *data;
@@ -63,6 +68,9 @@ struct kthread {
#endif
/* To store the full name if task comm is truncated. */
char *full_name;
+ struct task_struct *task;
+ struct list_head hotplug_node;
+ struct cpumask *preferred_affinity;
};
enum KTHREAD_BITS {
@@ -121,8 +129,11 @@ bool set_kthread_struct(struct task_struct *p)
init_completion(&kthread->exited);
init_completion(&kthread->parked);
+ INIT_LIST_HEAD(&kthread->hotplug_node);
p->vfork_done = &kthread->exited;
+ kthread->task = p;
+ kthread->node = tsk_fork_get_node(current);
p->worker_private = kthread;
return true;
}
@@ -313,6 +324,16 @@ void __noreturn kthread_exit(long result)
{
struct kthread *kthread = to_kthread(current);
kthread->result = result;
+ if (!list_empty(&kthread->hotplug_node)) {
+ mutex_lock(&kthreads_hotplug_lock);
+ list_del(&kthread->hotplug_node);
+ mutex_unlock(&kthreads_hotplug_lock);
+
+ if (kthread->preferred_affinity) {
+ kfree(kthread->preferred_affinity);
+ kthread->preferred_affinity = NULL;
+ }
+ }
do_exit(0);
}
EXPORT_SYMBOL(kthread_exit);
@@ -338,6 +359,56 @@ void __noreturn kthread_complete_and_exit(struct completion *comp, long code)
}
EXPORT_SYMBOL(kthread_complete_and_exit);
+static void kthread_fetch_affinity(struct kthread *kthread, struct cpumask *cpumask)
+{
+ const struct cpumask *pref;
+
+ if (kthread->preferred_affinity) {
+ pref = kthread->preferred_affinity;
+ } else {
+ if (WARN_ON_ONCE(kthread->node == NUMA_NO_NODE))
+ return;
+ pref = cpumask_of_node(kthread->node);
+ }
+
+ cpumask_and(cpumask, pref, housekeeping_cpumask(HK_TYPE_KTHREAD));
+ if (cpumask_empty(cpumask))
+ cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_KTHREAD));
+}
+
+static void kthread_affine_node(void)
+{
+ struct kthread *kthread = to_kthread(current);
+ cpumask_var_t affinity;
+
+ WARN_ON_ONCE(kthread_is_per_cpu(current));
+
+ if (kthread->node == NUMA_NO_NODE) {
+ housekeeping_affine(current, HK_TYPE_KTHREAD);
+ } else {
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ mutex_lock(&kthreads_hotplug_lock);
+ WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
+ list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
+ /*
+ * The node cpumask is racy when read from kthread() but:
+ * - a racing CPU going down will either fail on the subsequent
+ * call to set_cpus_allowed_ptr() or be migrated to housekeepers
+ * afterwards by the scheduler.
+ * - a racing CPU going up will be handled by kthreads_online_cpu()
+ */
+ kthread_fetch_affinity(kthread, affinity);
+ set_cpus_allowed_ptr(current, affinity);
+ mutex_unlock(&kthreads_hotplug_lock);
+
+ free_cpumask_var(affinity);
+ }
+}
+
static int kthread(void *_create)
{
static const struct sched_param param = { .sched_priority = 0 };
@@ -368,7 +439,6 @@ static int kthread(void *_create)
* back to default in case they have been changed.
*/
sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
- set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE);
@@ -382,6 +452,11 @@ static int kthread(void *_create)
schedule_preempt_disabled();
preempt_enable();
+ self->started = 1;
+
+ if (!(current->flags & PF_NO_SETAFFINITY) && !self->preferred_affinity)
+ kthread_affine_node();
+
ret = -EINTR;
if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
cgroup_kthread_ready();
@@ -540,7 +615,9 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int
void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
{
+ struct kthread *kthread = to_kthread(p);
__kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
+ WARN_ON_ONCE(kthread->started);
}
/**
@@ -554,7 +631,9 @@ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
*/
void kthread_bind(struct task_struct *p, unsigned int cpu)
{
+ struct kthread *kthread = to_kthread(p);
__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
+ WARN_ON_ONCE(kthread->started);
}
EXPORT_SYMBOL(kthread_bind);
@@ -775,6 +854,92 @@ int kthreadd(void *unused)
return 0;
}
+int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
+{
+ struct kthread *kthread = to_kthread(p);
+ cpumask_var_t affinity;
+ unsigned long flags;
+ int ret;
+
+ if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ WARN_ON_ONCE(kthread->preferred_affinity);
+
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
+ return -ENOMEM;
+
+ kthread->preferred_affinity = kzalloc(sizeof(struct cpumask), GFP_KERNEL);
+ if (!kthread->preferred_affinity) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_lock(&kthreads_hotplug_lock);
+ cpumask_copy(kthread->preferred_affinity, mask);
+ WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
+ list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
+ kthread_fetch_affinity(kthread, affinity);
+
+ /* It's safe because the task is inactive. */
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
+ do_set_cpus_allowed(p, affinity);
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+ mutex_unlock(&kthreads_hotplug_lock);
+out:
+ free_cpumask_var(affinity);
+
+ return 0;
+}
+
+/*
+ * Re-affine kthreads according to their preferences
+ * and the newly online CPU. The CPU down part is handled
+ * by select_fallback_rq() which default re-affines to
+ * housekeepers from other nodes in case the preferred
+ * affinity doesn't apply anymore.
+ */
+static int kthreads_online_cpu(unsigned int cpu)
+{
+ cpumask_var_t affinity;
+ struct kthread *k;
+ int ret;
+
+ guard(mutex)(&kthreads_hotplug_lock);
+
+ if (list_empty(&kthreads_hotplug))
+ return 0;
+
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = 0;
+
+ list_for_each_entry(k, &kthreads_hotplug, hotplug_node) {
+ if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) ||
+ kthread_is_per_cpu(k->task))) {
+ ret = -EINVAL;
+ continue;
+ }
+ kthread_fetch_affinity(k, affinity);
+ set_cpus_allowed_ptr(k->task, affinity);
+ }
+
+ free_cpumask_var(affinity);
+
+ return ret;
+}
+
+static int kthreads_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_KTHREADS_ONLINE, "kthreads:online",
+ kthreads_online_cpu, NULL);
+}
+early_initcall(kthreads_init);
+
void __kthread_init_worker(struct kthread_worker *worker,
const char *name,
struct lock_class_key *key)
@@ -866,12 +1031,11 @@ repeat:
EXPORT_SYMBOL_GPL(kthread_worker_fn);
static __printf(3, 0) struct kthread_worker *
-__kthread_create_worker(int cpu, unsigned int flags,
- const char namefmt[], va_list args)
+__kthread_create_worker_on_node(unsigned int flags, int node,
+ const char namefmt[], va_list args)
{
struct kthread_worker *worker;
struct task_struct *task;
- int node = NUMA_NO_NODE;
worker = kzalloc(sizeof(*worker), GFP_KERNEL);
if (!worker)
@@ -879,20 +1043,14 @@ __kthread_create_worker(int cpu, unsigned int flags,
kthread_init_worker(worker);
- if (cpu >= 0)
- node = cpu_to_node(cpu);
-
task = __kthread_create_on_node(kthread_worker_fn, worker,
- node, namefmt, args);
+ node, namefmt, args);
if (IS_ERR(task))
goto fail_task;
- if (cpu >= 0)
- kthread_bind(task, cpu);
-
worker->flags = flags;
worker->task = task;
- wake_up_process(task);
+
return worker;
fail_task:
@@ -901,8 +1059,9 @@ fail_task:
}
/**
- * kthread_create_worker - create a kthread worker
+ * kthread_create_worker_on_node - create a kthread worker
* @flags: flags modifying the default behavior of the worker
+ * @node: task structure for the thread is allocated on this node
* @namefmt: printf-style name for the kthread worker (task).
*
* Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
@@ -910,25 +1069,26 @@ fail_task:
* when the caller was killed by a fatal signal.
*/
struct kthread_worker *
-kthread_create_worker(unsigned int flags, const char namefmt[], ...)
+kthread_create_worker_on_node(unsigned int flags, int node, const char namefmt[], ...)
{
struct kthread_worker *worker;
va_list args;
va_start(args, namefmt);
- worker = __kthread_create_worker(-1, flags, namefmt, args);
+ worker = __kthread_create_worker_on_node(flags, node, namefmt, args);
va_end(args);
return worker;
}
-EXPORT_SYMBOL(kthread_create_worker);
+EXPORT_SYMBOL(kthread_create_worker_on_node);
/**
* kthread_create_worker_on_cpu - create a kthread worker and bind it
* to a given CPU and the associated NUMA node.
* @cpu: CPU number
* @flags: flags modifying the default behavior of the worker
- * @namefmt: printf-style name for the kthread worker (task).
+ * @namefmt: printf-style name for the thread. Format is restricted
+ * to "name.*%u". Code fills in cpu number.
*
* Use a valid CPU number if you want to bind the kthread worker
* to the given CPU and the associated NUMA node.
@@ -960,14 +1120,13 @@ EXPORT_SYMBOL(kthread_create_worker);
*/
struct kthread_worker *
kthread_create_worker_on_cpu(int cpu, unsigned int flags,
- const char namefmt[], ...)
+ const char namefmt[])
{
struct kthread_worker *worker;
- va_list args;
- va_start(args, namefmt);
- worker = __kthread_create_worker(cpu, flags, namefmt, args);
- va_end(args);
+ worker = kthread_create_worker_on_node(flags, cpu_to_node(cpu), namefmt, cpu);
+ if (!IS_ERR(worker))
+ kthread_bind(worker->task, cpu);
return worker;
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f81df3923f37..2795d6b5109c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -149,7 +149,6 @@ static int rcu_scheduler_fully_active __read_mostly;
static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
unsigned long gps, unsigned long flags);
-static struct task_struct *rcu_boost_task(struct rcu_node *rnp);
static void invoke_rcu_core(void);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu);
@@ -4072,6 +4071,22 @@ rcu_boot_init_percpu_data(int cpu)
rcu_boot_init_nocb_percpu_data(rdp);
}
+static void rcu_thread_affine_rnp(struct task_struct *t, struct rcu_node *rnp)
+{
+ cpumask_var_t affinity;
+ int cpu;
+
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
+ return;
+
+ for_each_leaf_node_possible_cpu(rnp, cpu)
+ cpumask_set_cpu(cpu, affinity);
+
+ kthread_affine_preferred(t, affinity);
+
+ free_cpumask_var(affinity);
+}
+
struct kthread_worker *rcu_exp_gp_kworker;
static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
@@ -4094,16 +4109,9 @@ static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD))
sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, &param);
-}
-static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp)
-{
- struct kthread_worker *kworker = READ_ONCE(rnp->exp_kworker);
-
- if (!kworker)
- return NULL;
-
- return kworker->task;
+ rcu_thread_affine_rnp(kworker->task, rnp);
+ wake_up_process(kworker->task);
}
static void __init rcu_start_exp_gp_kworker(void)
@@ -4111,7 +4119,7 @@ static void __init rcu_start_exp_gp_kworker(void)
const char *name = "rcu_exp_gp_kthread_worker";
struct sched_param param = { .sched_priority = kthread_prio };
- rcu_exp_gp_kworker = kthread_create_worker(0, name);
+ rcu_exp_gp_kworker = kthread_run_worker(0, name);
if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
pr_err("Failed to create %s!\n", name);
rcu_exp_gp_kworker = NULL;
@@ -4189,67 +4197,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
}
/*
- * Update kthreads affinity during CPU-hotplug changes.
- *
- * Set the per-rcu_node kthread's affinity to cover all CPUs that are
- * served by the rcu_node in question. The CPU hotplug lock is still
- * held, so the value of rnp->qsmaskinit will be stable.
- *
- * We don't include outgoingcpu in the affinity set, use -1 if there is
- * no outgoing CPU. If there are no CPUs left in the affinity set,
- * this function allows the kthread to execute on any CPU.
- *
- * Any future concurrent calls are serialized via ->kthread_mutex.
- */
-static void rcutree_affinity_setting(unsigned int cpu, int outgoingcpu)
-{
- cpumask_var_t cm;
- unsigned long mask;
- struct rcu_data *rdp;
- struct rcu_node *rnp;
- struct task_struct *task_boost, *task_exp;
-
- rdp = per_cpu_ptr(&rcu_data, cpu);
- rnp = rdp->mynode;
-
- task_boost = rcu_boost_task(rnp);
- task_exp = rcu_exp_par_gp_task(rnp);
-
- /*
- * If CPU is the boot one, those tasks are created later from early
- * initcall since kthreadd must be created first.
- */
- if (!task_boost && !task_exp)
- return;
-
- if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
- return;
-
- mutex_lock(&rnp->kthread_mutex);
- mask = rcu_rnp_online_cpus(rnp);
- for_each_leaf_node_possible_cpu(rnp, cpu)
- if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
- cpu != outgoingcpu)
- cpumask_set_cpu(cpu, cm);
- cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
- if (cpumask_empty(cm)) {
- cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
- if (outgoingcpu >= 0)
- cpumask_clear_cpu(outgoingcpu, cm);
- }
-
- if (task_exp)
- set_cpus_allowed_ptr(task_exp, cm);
-
- if (task_boost)
- set_cpus_allowed_ptr(task_boost, cm);
-
- mutex_unlock(&rnp->kthread_mutex);
-
- free_cpumask_var(cm);
-}
-
-/*
* Has the specified (known valid) CPU ever been fully online?
*/
bool rcu_cpu_beenfullyonline(int cpu)
@@ -4277,7 +4224,6 @@ int rcutree_online_cpu(unsigned int cpu)
if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
return 0; /* Too early in boot for scheduler work. */
sync_sched_exp_online_cleanup(cpu);
- rcutree_affinity_setting(cpu, -1);
// Stop-machine done, so allow nohz_full to disable tick.
tick_dep_clear(TICK_DEP_BIT_RCU);
@@ -4494,8 +4440,6 @@ int rcutree_offline_cpu(unsigned int cpu)
rnp->ffmask &= ~rdp->grpmask;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- rcutree_affinity_setting(cpu, cpu);
-
// nohz_full CPUs need the tick for stop-machine to work quickly
tick_dep_set(TICK_DEP_BIT_RCU);
return 0;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index bb7ca6eb9ef0..3600152b858e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1218,16 +1218,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rnp->boost_kthread_task = t;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+ rcu_thread_affine_rnp(t, rnp);
wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
}
-static struct task_struct *rcu_boost_task(struct rcu_node *rnp)
-{
- return READ_ONCE(rnp->boost_kthread_task);
-}
-
#else /* #ifdef CONFIG_RCU_BOOST */
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@@ -1244,10 +1241,6 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
{
}
-static struct task_struct *rcu_boost_task(struct rcu_node *rnp)
-{
- return NULL;
-}
#endif /* #else #ifdef CONFIG_RCU_BOOST */
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4365b479e345..88a9a515b2ba 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3538,7 +3538,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
*
* More yuck to audit.
*/
- do_set_cpus_allowed(p, task_cpu_possible_mask(p));
+ do_set_cpus_allowed(p, task_cpu_fallback_mask(p));
state = fail;
break;
case fail:
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 19813b387ef9..7fee43426ee7 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -5399,7 +5399,7 @@ static struct kthread_worker *scx_create_rt_helper(const char *name)
{
struct kthread_worker *helper;
- helper = kthread_create_worker(0, name);
+ helper = kthread_run_worker(0, name);
if (helper)
sched_set_fifo(helper->task);
return helper;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9362484a653c..33a23c7b2274 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -7840,7 +7840,7 @@ static void __init wq_cpu_intensive_thresh_init(void)
unsigned long thresh;
unsigned long bogo;
- pwq_release_worker = kthread_create_worker(0, "pool_workqueue_release");
+ pwq_release_worker = kthread_run_worker(0, "pool_workqueue_release");
BUG_ON(IS_ERR(pwq_release_worker));
/* if the user set it to a specific value, keep it */
diff --git a/lib/test_objpool.c b/lib/test_objpool.c
index 5a3f6961a70f..896c0131c9a8 100644
--- a/lib/test_objpool.c
+++ b/lib/test_objpool.c
@@ -371,14 +371,10 @@ static int ot_start_sync(struct ot_test *test)
if (!cpu_online(cpu))
continue;
- work = kthread_create_on_node(ot_thread_worker, item,
- cpu_to_node(cpu), "ot_worker_%d", cpu);
- if (IS_ERR(work)) {
+ work = kthread_run_on_cpu(ot_thread_worker, item,
+ cpu, "ot_worker_%d");
+ if (IS_ERR(work))
pr_err("failed to create thread for cpu %d\n", cpu);
- } else {
- kthread_bind(work, cpu);
- wake_up_process(work);
- }
}
/* wait a while to make sure all threads waiting at start line */
@@ -562,14 +558,9 @@ static int ot_start_async(struct ot_test *test)
if (!cpu_online(cpu))
continue;
- work = kthread_create_on_node(ot_thread_worker, item,
- cpu_to_node(cpu), "ot_worker_%d", cpu);
- if (IS_ERR(work)) {
+ work = kthread_run_on_cpu(ot_thread_worker, item, cpu, "ot_worker_%d");
+ if (IS_ERR(work))
pr_err("failed to create thread for cpu %d\n", cpu);
- } else {
- kthread_bind(work, cpu);
- wake_up_process(work);
- }
}
/* wait a while to make sure all threads waiting at start line */
diff --git a/mm/compaction.c b/mm/compaction.c
index a2b16b08cbbf..a31c0f5758cf 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -3154,15 +3154,9 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx)
static int kcompactd(void *p)
{
pg_data_t *pgdat = (pg_data_t *)p;
- struct task_struct *tsk = current;
long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
long timeout = default_timeout;
- const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
-
- if (!cpumask_empty(cpumask))
- set_cpus_allowed_ptr(tsk, cpumask);
-
set_freezable();
pgdat->kcompactd_max_order = 0;
@@ -3233,10 +3227,12 @@ void __meminit kcompactd_run(int nid)
if (pgdat->kcompactd)
return;
- pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
+ pgdat->kcompactd = kthread_create_on_node(kcompactd, pgdat, nid, "kcompactd%d", nid);
if (IS_ERR(pgdat->kcompactd)) {
pr_err("Failed to start kcompactd on node %d\n", nid);
pgdat->kcompactd = NULL;
+ } else {
+ wake_up_process(pgdat->kcompactd);
}
}
@@ -3254,30 +3250,6 @@ void __meminit kcompactd_stop(int nid)
}
}
-/*
- * It's optimal to keep kcompactd on the same CPUs as their memory, but
- * not required for correctness. So if the last cpu in a node goes
- * away, we get changed to run anywhere: as the first one comes back,
- * restore their cpu bindings.
- */
-static int kcompactd_cpu_online(unsigned int cpu)
-{
- int nid;
-
- for_each_node_state(nid, N_MEMORY) {
- pg_data_t *pgdat = NODE_DATA(nid);
- const struct cpumask *mask;
-
- mask = cpumask_of_node(pgdat->node_id);
-
- if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
- /* One of our CPUs online: restore mask */
- if (pgdat->kcompactd)
- set_cpus_allowed_ptr(pgdat->kcompactd, mask);
- }
- return 0;
-}
-
static int proc_dointvec_minmax_warn_RT_change(const struct ctl_table *table,
int write, void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -3337,15 +3309,6 @@ static struct ctl_table vm_compaction[] = {
static int __init kcompactd_init(void)
{
int nid;
- int ret;
-
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "mm/compaction:online",
- kcompactd_cpu_online, NULL);
- if (ret < 0) {
- pr_err("kcompactd: failed to register hotplug callbacks.\n");
- return ret;
- }
for_each_node_state(nid, N_MEMORY)
kcompactd_run(nid);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b1ec5ece067e..867a2554434a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -7192,10 +7192,6 @@ static int kswapd(void *p)
unsigned int highest_zoneidx = MAX_NR_ZONES - 1;
pg_data_t *pgdat = (pg_data_t *)p;
struct task_struct *tsk = current;
- const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
-
- if (!cpumask_empty(cpumask))
- set_cpus_allowed_ptr(tsk, cpumask);
/*
* Tell the memory management that we're a "memory allocator",
@@ -7364,13 +7360,15 @@ void __meminit kswapd_run(int nid)
pgdat_kswapd_lock(pgdat);
if (!pgdat->kswapd) {
- pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
+ pgdat->kswapd = kthread_create_on_node(kswapd, pgdat, nid, "kswapd%d", nid);
if (IS_ERR(pgdat->kswapd)) {
/* failure at boot is fatal */
pr_err("Failed to start kswapd on node %d,ret=%ld\n",
nid, PTR_ERR(pgdat->kswapd));
BUG_ON(system_state < SYSTEM_RUNNING);
pgdat->kswapd = NULL;
+ } else {
+ wake_up_process(pgdat->kswapd);
}
}
pgdat_kswapd_unlock(pgdat);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 281bbac5539d..c33d4bf17929 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -66,7 +66,7 @@ static int ksz_connect(struct dsa_switch *ds)
if (!priv)
return -ENOMEM;
- xmit_worker = kthread_create_worker(0, "dsa%d:%d_xmit",
+ xmit_worker = kthread_run_worker(0, "dsa%d:%d_xmit",
ds->dst->index, ds->index);
if (IS_ERR(xmit_worker)) {
ret = PTR_ERR(xmit_worker);
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 11ea8cfd6266..3929584791e4 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -110,7 +110,7 @@ static int ocelot_connect(struct dsa_switch *ds)
if (!priv)
return -ENOMEM;
- priv->xmit_worker = kthread_create_worker(0, "felix_xmit");
+ priv->xmit_worker = kthread_run_worker(0, "felix_xmit");
if (IS_ERR(priv->xmit_worker)) {
err = PTR_ERR(priv->xmit_worker);
kfree(priv);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 3e902af7eea6..02adec693811 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -707,7 +707,7 @@ static int sja1105_connect(struct dsa_switch *ds)
spin_lock_init(&priv->meta_lock);
- xmit_worker = kthread_create_worker(0, "dsa%d:%d_xmit",
+ xmit_worker = kthread_run_worker(0, "dsa%d:%d_xmit",
ds->dst->index, ds->index);
if (IS_ERR(xmit_worker)) {
err = PTR_ERR(xmit_worker);