summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2023-10-10 09:31:42 +0100
committerIngo Molnar <mingo@kernel.org>2023-10-10 23:41:47 +0200
commitb7a5b537c55c088d891ae554103d1b281abef781 (patch)
treec241efff414124021a2eb614afde84aeb2980340 /kernel/sched
parent2e2675db1906ac04809f5399bf1f5e30d56a6f3e (diff)
sched/numa: Complete scanning of partial VMAs regardless of PID activity
NUMA Balancing skips VMAs when the current task has not trapped a NUMA fault within the VMA. If the VMA is skipped then mm->numa_scan_offset advances and a task that is trapping faults within the VMA may never fully update PTEs within the VMA. Force tasks to update PTEs for partially scanned PTEs. The VMA will be tagged for NUMA hints by some task but this removes some of the benefit of tracking PID activity within a VMA. A follow-on patch will mitigate this problem. The test cases and machines evaluated did not trigger the corner case so the performance results are neutral with only small changes within the noise from normal test-to-test variance. However, the next patch makes the corner case easier to trigger. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Ingo Molnar <mingo@kernel.org> Tested-by: Raghavendra K T <raghavendra.kt@amd.com> Link: https://lore.kernel.org/r/20231010083143.19593-6-mgorman@techsingularity.net
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c18
1 files changed, 15 insertions, 3 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ce36969625bd..ab79013f6e91 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3113,7 +3113,7 @@ static void reset_ptenuma_scan(struct task_struct *p)
p->mm->numa_scan_offset = 0;
}
-static bool vma_is_accessed(struct vm_area_struct *vma)
+static bool vma_is_accessed(struct mm_struct *mm, struct vm_area_struct *vma)
{
unsigned long pids;
/*
@@ -3126,7 +3126,19 @@ static bool vma_is_accessed(struct vm_area_struct *vma)
return true;
pids = vma->numab_state->pids_active[0] | vma->numab_state->pids_active[1];
- return test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids);
+ if (test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids))
+ return true;
+
+ /*
+ * Complete a scan that has already started regardless of PID access, or
+ * some VMAs may never be scanned in multi-threaded applications:
+ */
+ if (mm->numa_scan_offset > vma->vm_start) {
+ trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_IGNORE_PID);
+ return true;
+ }
+
+ return false;
}
#define VMA_PID_RESET_PERIOD (4 * sysctl_numa_balancing_scan_delay)
@@ -3270,7 +3282,7 @@ static void task_numa_work(struct callback_head *work)
}
/* Do not scan the VMA if task has not accessed */
- if (!vma_is_accessed(vma)) {
+ if (!vma_is_accessed(mm, vma)) {
trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE);
continue;
}