summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/ext_idle.c48
1 files changed, 44 insertions, 4 deletions
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 5d6253c6ed90..f39b34fbb8a6 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -49,6 +49,7 @@ static struct scx_idle_cpus **scx_idle_node_masks;
/*
* Local per-CPU cpumasks (used to generate temporary idle cpumasks).
*/
+static DEFINE_PER_CPU(cpumask_var_t, local_idle_cpumask);
static DEFINE_PER_CPU(cpumask_var_t, local_llc_idle_cpumask);
static DEFINE_PER_CPU(cpumask_var_t, local_numa_idle_cpumask);
@@ -417,13 +418,15 @@ static inline bool task_affinity_all(const struct task_struct *p)
* branch prediction optimizations.
*
* 3. Pick a CPU within the same LLC (Last-Level Cache):
- * - if the above conditions aren't met, pick a CPU that shares the same LLC
- * to maintain cache locality.
+ * - if the above conditions aren't met, pick a CPU that shares the same
+ * LLC, if the LLC domain is a subset of @cpus_allowed, to maintain
+ * cache locality.
*
* 4. Pick a CPU within the same NUMA node, if enabled:
- * - choose a CPU from the same NUMA node to reduce memory access latency.
+ * - choose a CPU from the same NUMA node, if the node cpumask is a
+ * subset of @cpus_allowed, to reduce memory access latency.
*
- * 5. Pick any idle CPU usable by the task.
+ * 5. Pick any idle CPU within the @cpus_allowed domain.
*
* Step 3 and 4 are performed only if the system has, respectively,
* multiple LLCs / multiple NUMA nodes (see scx_selcpu_topo_llc and
@@ -446,6 +449,39 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
int node = scx_cpu_node_if_enabled(prev_cpu);
s32 cpu;
+ preempt_disable();
+
+ /*
+ * Determine the subset of CPUs usable by @p within @cpus_allowed.
+ */
+ if (allowed != p->cpus_ptr) {
+ struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_idle_cpumask);
+
+ if (task_affinity_all(p)) {
+ allowed = cpus_allowed;
+ } else if (cpumask_and(local_cpus, cpus_allowed, p->cpus_ptr)) {
+ allowed = local_cpus;
+ } else {
+ cpu = -EBUSY;
+ goto out_enable;
+ }
+
+ /*
+ * If @prev_cpu is not in the allowed CPUs, skip topology
+ * optimizations and try to pick any idle CPU usable by the
+ * task.
+ *
+ * If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize
+ * the current node, as it may optimize some waker->wakee
+ * workloads.
+ */
+ if (!cpumask_test_cpu(prev_cpu, allowed)) {
+ node = scx_cpu_node_if_enabled(smp_processor_id());
+ cpu = scx_pick_idle_cpu(allowed, node, flags);
+ goto out_enable;
+ }
+ }
+
/*
* This is necessary to protect llc_cpus.
*/
@@ -610,6 +646,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
out_unlock:
rcu_read_unlock();
+out_enable:
+ preempt_enable();
return cpu;
}
@@ -641,6 +679,8 @@ void scx_idle_init_masks(void)
/* Allocate local per-cpu idle cpumasks */
for_each_possible_cpu(i) {
+ BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_idle_cpumask, i),
+ GFP_KERNEL, cpu_to_node(i)));
BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_llc_idle_cpumask, i),
GFP_KERNEL, cpu_to_node(i)));
BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_numa_idle_cpumask, i),