summaryrefslogtreecommitdiff
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c55
1 files changed, 52 insertions, 3 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ab79013f6e91..922905194c0c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3158,6 +3158,8 @@ static void task_numa_work(struct callback_head *work)
unsigned long nr_pte_updates = 0;
long pages, virtpages;
struct vma_iterator vmi;
+ bool vma_pids_skipped;
+ bool vma_pids_forced = false;
SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work));
@@ -3200,7 +3202,6 @@ static void task_numa_work(struct callback_head *work)
*/
p->node_stamp += 2 * TICK_NSEC;
- start = mm->numa_scan_offset;
pages = sysctl_numa_balancing_scan_size;
pages <<= 20 - PAGE_SHIFT; /* MB in pages */
virtpages = pages * 8; /* Scan up to this much virtual space */
@@ -3210,6 +3211,16 @@ static void task_numa_work(struct callback_head *work)
if (!mmap_read_trylock(mm))
return;
+
+ /*
+ * VMAs are skipped if the current PID has not trapped a fault within
+ * the VMA recently. Allow scanning to be forced if there is no
+ * suitable VMA remaining.
+ */
+ vma_pids_skipped = false;
+
+retry_pids:
+ start = mm->numa_scan_offset;
vma_iter_init(&vmi, mm, start);
vma = vma_next(&vmi);
if (!vma) {
@@ -3260,6 +3271,13 @@ static void task_numa_work(struct callback_head *work)
/* Reset happens after 4 times scan delay of scan start */
vma->numab_state->pids_active_reset = vma->numab_state->next_scan +
msecs_to_jiffies(VMA_PID_RESET_PERIOD);
+
+ /*
+ * Ensure prev_scan_seq does not match numa_scan_seq,
+ * to prevent VMAs being skipped prematurely on the
+ * first scan:
+ */
+ vma->numab_state->prev_scan_seq = mm->numa_scan_seq - 1;
}
/*
@@ -3281,8 +3299,19 @@ static void task_numa_work(struct callback_head *work)
vma->numab_state->pids_active[1] = 0;
}
- /* Do not scan the VMA if task has not accessed */
- if (!vma_is_accessed(mm, vma)) {
+ /* Do not rescan VMAs twice within the same sequence. */
+ if (vma->numab_state->prev_scan_seq == mm->numa_scan_seq) {
+ mm->numa_scan_offset = vma->vm_end;
+ trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_SEQ_COMPLETED);
+ continue;
+ }
+
+ /*
+ * Do not scan the VMA if task has not accessed it, unless no other
+ * VMA candidate exists.
+ */
+ if (!vma_pids_forced && !vma_is_accessed(mm, vma)) {
+ vma_pids_skipped = true;
trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE);
continue;
}
@@ -3311,8 +3340,28 @@ static void task_numa_work(struct callback_head *work)
cond_resched();
} while (end != vma->vm_end);
+
+ /* VMA scan is complete, do not scan until next sequence. */
+ vma->numab_state->prev_scan_seq = mm->numa_scan_seq;
+
+ /*
+ * Only force scan within one VMA at a time, to limit the
+ * cost of scanning a potentially uninteresting VMA.
+ */
+ if (vma_pids_forced)
+ break;
} for_each_vma(vmi, vma);
+ /*
+ * If no VMAs are remaining and VMAs were skipped due to the PID
+ * not accessing the VMA previously, then force a scan to ensure
+ * forward progress:
+ */
+ if (!vma && !vma_pids_forced && vma_pids_skipped) {
+ vma_pids_forced = true;
+ goto retry_pids;
+ }
+
out:
/*
* It is possible to reach the end of the VMA list but the last few