summaryrefslogtreecommitdiff
path: root/mm/damon/vaddr.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/damon/vaddr.c')
-rw-r--r--mm/damon/vaddr.c699
1 files changed, 488 insertions, 211 deletions
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 89b6468da2b9..2750c88e7225 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -1,21 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * DAMON Primitives for Virtual Address Spaces
+ * DAMON Code for Virtual Address Spaces
*
- * Author: SeongJae Park <sjpark@amazon.de>
+ * Author: SeongJae Park <sj@kernel.org>
*/
#define pr_fmt(fmt) "damon-va: " fmt
-#include <asm-generic/mman-common.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
+#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
#include <linux/pagewalk.h>
#include <linux/sched/mm.h>
-#include "prmtv-common.h"
+#include "../internal.h"
+#include "ops-common.h"
#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
#undef DAMON_MIN_REGION
@@ -23,12 +24,12 @@
#endif
/*
- * 't->id' should be the pointer to the relevant 'struct pid' having reference
+ * 't->pid' should be the pointer to the relevant 'struct pid' having reference
* count. Caller must put the returned task, unless it is NULL.
*/
static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
{
- return get_pid_task((struct pid *)t->id, PIDTYPE_PID);
+ return get_pid_task(t->pid, PIDTYPE_PID);
}
/*
@@ -67,12 +68,16 @@ static int damon_va_evenly_split_region(struct damon_target *t,
unsigned long sz_orig, sz_piece, orig_end;
struct damon_region *n = NULL, *next;
unsigned long start;
+ unsigned int i;
if (!r || !nr_pieces)
return -EINVAL;
+ if (nr_pieces == 1)
+ return 0;
+
orig_end = r->ar.end;
- sz_orig = r->ar.end - r->ar.start;
+ sz_orig = damon_sz_region(r);
sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION);
if (!sz_piece)
@@ -80,8 +85,7 @@ static int damon_va_evenly_split_region(struct damon_target *t,
r->ar.end = r->ar.start + sz_piece;
next = damon_next_region(r);
- for (start = r->ar.end; start + sz_piece <= orig_end;
- start += sz_piece) {
+ for (start = r->ar.end, i = 1; i < nr_pieces; start += sz_piece, i++) {
n = damon_new_region(start, start + sz_piece);
if (!n)
return -ENOMEM;
@@ -113,38 +117,41 @@ static unsigned long sz_range(struct damon_addr_range *r)
*
* Returns 0 if success, or negative error code otherwise.
*/
-static int __damon_va_three_regions(struct vm_area_struct *vma,
+static int __damon_va_three_regions(struct mm_struct *mm,
struct damon_addr_range regions[3])
{
- struct damon_addr_range gap = {0}, first_gap = {0}, second_gap = {0};
- struct vm_area_struct *last_vma = NULL;
- unsigned long start = 0;
- struct rb_root rbroot;
+ struct damon_addr_range first_gap = {0}, second_gap = {0};
+ VMA_ITERATOR(vmi, mm, 0);
+ struct vm_area_struct *vma, *prev = NULL;
+ unsigned long start;
- /* Find two biggest gaps so that first_gap > second_gap > others */
- for (; vma; vma = vma->vm_next) {
- if (!last_vma) {
- start = vma->vm_start;
- goto next;
- }
+ /*
+ * Find the two biggest gaps so that first_gap > second_gap > others.
+ * If this is too slow, it can be optimised to examine the maple
+ * tree gaps.
+ */
+ rcu_read_lock();
+ for_each_vma(vmi, vma) {
+ unsigned long gap;
- if (vma->rb_subtree_gap <= sz_range(&second_gap)) {
- rbroot.rb_node = &vma->vm_rb;
- vma = rb_entry(rb_last(&rbroot),
- struct vm_area_struct, vm_rb);
+ if (!prev) {
+ start = vma->vm_start;
goto next;
}
-
- gap.start = last_vma->vm_end;
- gap.end = vma->vm_start;
- if (sz_range(&gap) > sz_range(&second_gap)) {
- swap(gap, second_gap);
- if (sz_range(&second_gap) > sz_range(&first_gap))
- swap(second_gap, first_gap);
+ gap = vma->vm_start - prev->vm_end;
+
+ if (gap > sz_range(&first_gap)) {
+ second_gap = first_gap;
+ first_gap.start = prev->vm_end;
+ first_gap.end = vma->vm_start;
+ } else if (gap > sz_range(&second_gap)) {
+ second_gap.start = prev->vm_end;
+ second_gap.end = vma->vm_start;
}
next:
- last_vma = vma;
+ prev = vma;
}
+ rcu_read_unlock();
if (!sz_range(&second_gap) || !sz_range(&first_gap))
return -EINVAL;
@@ -159,7 +166,7 @@ next:
regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION);
regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION);
regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION);
- regions[2].end = ALIGN(last_vma->vm_end, DAMON_MIN_REGION);
+ regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION);
return 0;
}
@@ -180,7 +187,7 @@ static int damon_va_three_regions(struct damon_target *t,
return -EINVAL;
mmap_read_lock(mm);
- rc = __damon_va_three_regions(mm->mmap, regions);
+ rc = __damon_va_three_regions(mm, regions);
mmap_read_unlock(mm);
mmput(mm);
@@ -250,8 +257,8 @@ static void __damon_va_init_regions(struct damon_ctx *ctx,
for (i = 0; i < 3; i++)
sz += regions[i].end - regions[i].start;
- if (ctx->min_nr_regions)
- sz /= ctx->min_nr_regions;
+ if (ctx->attrs.min_nr_regions)
+ sz /= ctx->attrs.min_nr_regions;
if (sz < DAMON_MIN_REGION)
sz = DAMON_MIN_REGION;
@@ -282,77 +289,6 @@ static void damon_va_init(struct damon_ctx *ctx)
}
/*
- * Functions for the dynamic monitoring target regions update
- */
-
-/*
- * Check whether a region is intersecting an address range
- *
- * Returns true if it is.
- */
-static bool damon_intersect(struct damon_region *r,
- struct damon_addr_range *re)
-{
- return !(r->ar.end <= re->start || re->end <= r->ar.start);
-}
-
-/*
- * Update damon regions for the three big regions of the given target
- *
- * t the given target
- * bregions the three big regions of the target
- */
-static void damon_va_apply_three_regions(struct damon_target *t,
- struct damon_addr_range bregions[3])
-{
- struct damon_region *r, *next;
- unsigned int i;
-
- /* Remove regions which are not in the three big regions now */
- damon_for_each_region_safe(r, next, t) {
- for (i = 0; i < 3; i++) {
- if (damon_intersect(r, &bregions[i]))
- break;
- }
- if (i == 3)
- damon_destroy_region(r, t);
- }
-
- /* Adjust intersecting regions to fit with the three big regions */
- for (i = 0; i < 3; i++) {
- struct damon_region *first = NULL, *last;
- struct damon_region *newr;
- struct damon_addr_range *br;
-
- br = &bregions[i];
- /* Get the first and last regions which intersects with br */
- damon_for_each_region(r, t) {
- if (damon_intersect(r, br)) {
- if (!first)
- first = r;
- last = r;
- }
- if (r->ar.start >= br->end)
- break;
- }
- if (!first) {
- /* no damon_region intersects with this big region */
- newr = damon_new_region(
- ALIGN_DOWN(br->start,
- DAMON_MIN_REGION),
- ALIGN(br->end, DAMON_MIN_REGION));
- if (!newr)
- continue;
- damon_insert_region(newr, damon_prev_region(r), r, t);
- } else {
- first->ar.start = ALIGN_DOWN(br->start,
- DAMON_MIN_REGION);
- last->ar.end = ALIGN(br->end, DAMON_MIN_REGION);
- }
- }
-}
-
-/*
* Update regions for current memory mappings
*/
static void damon_va_update(struct damon_ctx *ctx)
@@ -363,7 +299,7 @@ static void damon_va_update(struct damon_ctx *ctx)
damon_for_each_target(t, ctx) {
if (damon_va_three_regions(t, three_regions))
continue;
- damon_va_apply_three_regions(t, three_regions);
+ damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION);
}
}
@@ -373,22 +309,22 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
pte_t *pte;
spinlock_t *ptl;
- if (pmd_huge(*pmd)) {
- ptl = pmd_lock(walk->mm, pmd);
- if (pmd_huge(*pmd)) {
- damon_pmdp_mkold(pmd, walk->mm, addr);
- spin_unlock(ptl);
- return 0;
- }
+ ptl = pmd_trans_huge_lock(pmd, walk->vma);
+ if (ptl) {
+ pmd_t pmde = pmdp_get(pmd);
+
+ if (pmd_present(pmde))
+ damon_pmdp_mkold(pmd, walk->vma, addr);
spin_unlock(ptl);
+ return 0;
}
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- return 0;
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- if (!pte_present(*pte))
+ if (!pte)
+ return 0;
+ if (!pte_present(ptep_get(pte)))
goto out;
- damon_ptep_mkold(pte, walk->mm, addr);
+ damon_ptep_mkold(pte, walk->vma, addr);
out:
pte_unmap_unlock(pte, ptl);
return 0;
@@ -399,32 +335,27 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long addr)
{
bool referenced = false;
- pte_t entry = huge_ptep_get(pte);
- struct page *page = pte_page(entry);
+ pte_t entry = huge_ptep_get(mm, addr, pte);
+ struct folio *folio = pfn_folio(pte_pfn(entry));
+ unsigned long psize = huge_page_size(hstate_vma(vma));
- if (!page)
- return;
-
- get_page(page);
+ folio_get(folio);
if (pte_young(entry)) {
referenced = true;
entry = pte_mkold(entry);
- huge_ptep_set_access_flags(vma, addr, pte, entry,
- vma->vm_flags & VM_WRITE);
+ set_huge_pte_at(mm, addr, pte, entry, psize);
}
-#ifdef CONFIG_MMU_NOTIFIER
if (mmu_notifier_clear_young(mm, addr,
addr + huge_page_size(hstate_vma(vma))))
referenced = true;
-#endif /* CONFIG_MMU_NOTIFIER */
if (referenced)
- set_page_young(page);
+ folio_set_young(folio);
- set_page_idle(page);
- put_page(page);
+ folio_set_idle(folio);
+ folio_put(folio);
}
static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
@@ -436,7 +367,7 @@ static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
pte_t entry;
ptl = huge_pte_lock(h, walk->mm, pte);
- entry = huge_ptep_get(pte);
+ entry = huge_ptep_get(walk->mm, addr, pte);
if (!pte_present(entry))
goto out;
@@ -453,6 +384,7 @@ out:
static const struct mm_walk_ops damon_mkold_ops = {
.pmd_entry = damon_mkold_pmd_entry,
.hugetlb_entry = damon_mkold_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
};
static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
@@ -466,8 +398,8 @@ static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
* Functions for the access checking of the regions
*/
-static void __damon_va_prepare_access_check(struct damon_ctx *ctx,
- struct mm_struct *mm, struct damon_region *r)
+static void __damon_va_prepare_access_check(struct mm_struct *mm,
+ struct damon_region *r)
{
r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
@@ -485,13 +417,14 @@ static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
if (!mm)
continue;
damon_for_each_region(r, t)
- __damon_va_prepare_access_check(ctx, mm, r);
+ __damon_va_prepare_access_check(mm, r);
mmput(mm);
}
}
struct damon_young_walk_private {
- unsigned long *page_sz;
+ /* size of the folio for the access checked virtual memory address */
+ unsigned long *folio_sz;
bool young;
};
@@ -499,49 +432,45 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
pte_t *pte;
+ pte_t ptent;
spinlock_t *ptl;
- struct page *page;
+ struct folio *folio;
struct damon_young_walk_private *priv = walk->private;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_huge(*pmd)) {
- ptl = pmd_lock(walk->mm, pmd);
- if (!pmd_huge(*pmd)) {
- spin_unlock(ptl);
- goto regular_page;
- }
- page = damon_get_page(pmd_pfn(*pmd));
- if (!page)
+ ptl = pmd_trans_huge_lock(pmd, walk->vma);
+ if (ptl) {
+ pmd_t pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde))
+ goto huge_out;
+ folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
+ if (!folio)
goto huge_out;
- if (pmd_young(*pmd) || !page_is_idle(page) ||
+ if (pmd_young(pmde) || !folio_test_idle(folio) ||
mmu_notifier_test_young(walk->mm,
- addr)) {
- *priv->page_sz = ((1UL) << HPAGE_PMD_SHIFT);
+ addr))
priv->young = true;
- }
- put_page(page);
+ *priv->folio_sz = HPAGE_PMD_SIZE;
huge_out:
spin_unlock(ptl);
return 0;
}
-
-regular_page:
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- return -EINVAL;
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- if (!pte_present(*pte))
+ if (!pte)
+ return 0;
+ ptent = ptep_get(pte);
+ if (!pte_present(ptent))
goto out;
- page = damon_get_page(pte_pfn(*pte));
- if (!page)
+ folio = vm_normal_folio(walk->vma, addr, ptent);
+ if (!folio)
goto out;
- if (pte_young(*pte) || !page_is_idle(page) ||
- mmu_notifier_test_young(walk->mm, addr)) {
- *priv->page_sz = PAGE_SIZE;
+ if (pte_young(ptent) || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(walk->mm, addr))
priv->young = true;
- }
- put_page(page);
+ *priv->folio_sz = folio_size(folio);
out:
pte_unmap_unlock(pte, ptl);
return 0;
@@ -554,28 +483,24 @@ static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
{
struct damon_young_walk_private *priv = walk->private;
struct hstate *h = hstate_vma(walk->vma);
- struct page *page;
+ struct folio *folio;
spinlock_t *ptl;
pte_t entry;
ptl = huge_pte_lock(h, walk->mm, pte);
- entry = huge_ptep_get(pte);
+ entry = huge_ptep_get(walk->mm, addr, pte);
if (!pte_present(entry))
goto out;
- page = pte_page(entry);
- if (!page)
- goto out;
-
- get_page(page);
+ folio = pfn_folio(pte_pfn(entry));
+ folio_get(folio);
- if (pte_young(entry) || !page_is_idle(page) ||
- mmu_notifier_test_young(walk->mm, addr)) {
- *priv->page_sz = huge_page_size(h);
+ if (pte_young(entry) || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(walk->mm, addr))
priv->young = true;
- }
+ *priv->folio_sz = huge_page_size(h);
- put_page(page);
+ folio_put(folio);
out:
spin_unlock(ptl);
@@ -588,13 +513,14 @@ out:
static const struct mm_walk_ops damon_young_ops = {
.pmd_entry = damon_young_pmd_entry,
.hugetlb_entry = damon_young_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
};
static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
- unsigned long *page_sz)
+ unsigned long *folio_sz)
{
struct damon_young_walk_private arg = {
- .page_sz = page_sz,
+ .folio_sz = folio_sz,
.young = false,
};
@@ -610,27 +536,29 @@ static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
* mm 'mm_struct' for the given virtual address space
* r the region to be checked
*/
-static void __damon_va_check_access(struct damon_ctx *ctx,
- struct mm_struct *mm, struct damon_region *r)
+static void __damon_va_check_access(struct mm_struct *mm,
+ struct damon_region *r, bool same_target,
+ struct damon_attrs *attrs)
{
- static struct mm_struct *last_mm;
static unsigned long last_addr;
- static unsigned long last_page_sz = PAGE_SIZE;
+ static unsigned long last_folio_sz = PAGE_SIZE;
static bool last_accessed;
+ if (!mm) {
+ damon_update_region_access_rate(r, false, attrs);
+ return;
+ }
+
/* If the region is in the last checked page, reuse the result */
- if (mm == last_mm && (ALIGN_DOWN(last_addr, last_page_sz) ==
- ALIGN_DOWN(r->sampling_addr, last_page_sz))) {
- if (last_accessed)
- r->nr_accesses++;
+ if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) ==
+ ALIGN_DOWN(r->sampling_addr, last_folio_sz))) {
+ damon_update_region_access_rate(r, last_accessed, attrs);
return;
}
- last_accessed = damon_va_young(mm, r->sampling_addr, &last_page_sz);
- if (last_accessed)
- r->nr_accesses++;
+ last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz);
+ damon_update_region_access_rate(r, last_accessed, attrs);
- last_mm = mm;
last_addr = r->sampling_addr;
}
@@ -640,28 +568,196 @@ static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
struct mm_struct *mm;
struct damon_region *r;
unsigned int max_nr_accesses = 0;
+ bool same_target;
damon_for_each_target(t, ctx) {
mm = damon_get_mm(t);
- if (!mm)
- continue;
+ same_target = false;
damon_for_each_region(r, t) {
- __damon_va_check_access(ctx, mm, r);
+ __damon_va_check_access(mm, r, same_target,
+ &ctx->attrs);
max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
+ same_target = true;
}
- mmput(mm);
+ if (mm)
+ mmput(mm);
}
return max_nr_accesses;
}
+static bool damos_va_filter_young_match(struct damos_filter *filter,
+ struct folio *folio, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, pmd_t *pmdp)
+{
+ bool young = false;
+
+ if (ptep)
+ young = pte_young(ptep_get(ptep));
+ else if (pmdp)
+ young = pmd_young(pmdp_get(pmdp));
+
+ young = young || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(vma->vm_mm, addr);
+
+ if (young && ptep)
+ damon_ptep_mkold(ptep, vma, addr);
+ else if (young && pmdp)
+ damon_pmdp_mkold(pmdp, vma, addr);
+
+ return young == filter->matching;
+}
+
+static bool damos_va_filter_out(struct damos *scheme, struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pmd_t *pmdp)
+{
+ struct damos_filter *filter;
+ bool matched;
+
+ if (scheme->core_filters_allowed)
+ return false;
+
+ damos_for_each_ops_filter(filter, scheme) {
+ /*
+ * damos_folio_filter_match checks the young filter by doing an
+ * rmap on the folio to find its page table. However, being the
+ * vaddr scheme, we have direct access to the page tables, so
+ * use that instead.
+ */
+ if (filter->type == DAMOS_FILTER_TYPE_YOUNG)
+ matched = damos_va_filter_young_match(filter, folio,
+ vma, addr, ptep, pmdp);
+ else
+ matched = damos_folio_filter_match(filter, folio);
+
+ if (matched)
+ return !filter->allow;
+ }
+ return scheme->ops_filters_default_reject;
+}
+
+struct damos_va_migrate_private {
+ struct list_head *migration_lists;
+ struct damos *scheme;
+};
+
+/*
+ * Place the given folio in the migration_list corresponding to where the folio
+ * should be migrated.
+ *
+ * The algorithm used here is similar to weighted_interleave_nid()
+ */
+static void damos_va_migrate_dests_add(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr,
+ struct damos_migrate_dests *dests,
+ struct list_head *migration_lists)
+{
+ pgoff_t ilx;
+ int order;
+ unsigned int target;
+ unsigned int weight_total = 0;
+ int i;
+
+ /*
+ * If dests is empty, there is only one migration list corresponding
+ * to s->target_nid.
+ */
+ if (!dests->nr_dests) {
+ i = 0;
+ goto isolate;
+ }
+
+ order = folio_order(folio);
+ ilx = vma->vm_pgoff >> order;
+ ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order);
+
+ for (i = 0; i < dests->nr_dests; i++)
+ weight_total += dests->weight_arr[i];
+
+ /* If the total weights are somehow 0, don't migrate at all */
+ if (!weight_total)
+ return;
+
+ target = ilx % weight_total;
+ for (i = 0; i < dests->nr_dests; i++) {
+ if (target < dests->weight_arr[i])
+ break;
+ target -= dests->weight_arr[i];
+ }
+
+ /* If the folio is already in the right node, don't do anything */
+ if (folio_nid(folio) == dests->node_id_arr[i])
+ return;
+
+isolate:
+ if (!folio_isolate_lru(folio))
+ return;
+
+ list_add(&folio->lru, &migration_lists[i]);
+}
+
+static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct damos_va_migrate_private *priv = walk->private;
+ struct list_head *migration_lists = priv->migration_lists;
+ struct damos *s = priv->scheme;
+ struct damos_migrate_dests *dests = &s->migrate_dests;
+ struct folio *folio;
+ spinlock_t *ptl;
+ pte_t *start_pte, *pte, ptent;
+ int nr;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ ptl = pmd_trans_huge_lock(pmd, walk->vma);
+ if (ptl) {
+ pmd_t pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde))
+ goto huge_out;
+ folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
+ if (!folio)
+ goto huge_out;
+ if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd))
+ goto huge_out;
+ damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
+ migration_lists);
+huge_out:
+ spin_unlock(ptl);
+ return 0;
+ }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+ start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte)
+ return 0;
+
+ for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
+ nr = 1;
+ ptent = ptep_get(pte);
+
+ if (pte_none(ptent) || !pte_present(ptent))
+ continue;
+ folio = vm_normal_folio(walk->vma, addr, ptent);
+ if (!folio)
+ continue;
+ if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL))
+ return 0;
+ damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
+ migration_lists);
+ nr = folio_nr_pages(folio);
+ }
+ pte_unmap_unlock(start_pte, ptl);
+ return 0;
+}
+
/*
* Functions for the target validity check and cleanup
*/
-bool damon_va_target_valid(void *target)
+static bool damon_va_target_valid(struct damon_target *t)
{
- struct damon_target *t = target;
struct task_struct *task;
task = damon_get_task_struct(t);
@@ -673,6 +769,11 @@ bool damon_va_target_valid(void *target)
return false;
}
+static void damon_va_cleanup_target(struct damon_target *t)
+{
+ put_pid(t->pid);
+}
+
#ifndef CONFIG_ADVISE_SYSCALLS
static unsigned long damos_madvise(struct damon_target *target,
struct damon_region *r, int behavior)
@@ -685,7 +786,7 @@ static unsigned long damos_madvise(struct damon_target *target,
{
struct mm_struct *mm;
unsigned long start = PAGE_ALIGN(r->ar.start);
- unsigned long len = PAGE_ALIGN(r->ar.end - r->ar.start);
+ unsigned long len = PAGE_ALIGN(damon_sz_region(r));
unsigned long applied;
mm = damon_get_mm(target);
@@ -699,9 +800,157 @@ static unsigned long damos_madvise(struct damon_target *target,
}
#endif /* CONFIG_ADVISE_SYSCALLS */
+static unsigned long damos_va_migrate(struct damon_target *target,
+ struct damon_region *r, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ LIST_HEAD(folio_list);
+ struct damos_va_migrate_private priv;
+ struct mm_struct *mm;
+ int nr_dests;
+ int nid;
+ bool use_target_nid;
+ unsigned long applied = 0;
+ struct damos_migrate_dests *dests = &s->migrate_dests;
+ struct mm_walk_ops walk_ops = {
+ .pmd_entry = damos_va_migrate_pmd_entry,
+ .pte_entry = NULL,
+ .walk_lock = PGWALK_RDLOCK,
+ };
+
+ use_target_nid = dests->nr_dests == 0;
+ nr_dests = use_target_nid ? 1 : dests->nr_dests;
+ priv.scheme = s;
+ priv.migration_lists = kmalloc_array(nr_dests,
+ sizeof(*priv.migration_lists), GFP_KERNEL);
+ if (!priv.migration_lists)
+ return 0;
+
+ for (int i = 0; i < nr_dests; i++)
+ INIT_LIST_HEAD(&priv.migration_lists[i]);
+
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ goto free_lists;
+
+ mmap_read_lock(mm);
+ walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
+ mmap_read_unlock(mm);
+ mmput(mm);
+
+ for (int i = 0; i < nr_dests; i++) {
+ nid = use_target_nid ? s->target_nid : dests->node_id_arr[i];
+ applied += damon_migrate_pages(&priv.migration_lists[i], nid);
+ cond_resched();
+ }
+
+free_lists:
+ kfree(priv.migration_lists);
+ return applied * PAGE_SIZE;
+}
+
+struct damos_va_stat_private {
+ struct damos *scheme;
+ unsigned long *sz_filter_passed;
+};
+
+static inline bool damos_va_invalid_folio(struct folio *folio,
+ struct damos *s)
+{
+ return !folio || folio == s->last_applied;
+}
+
+static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct damos_va_stat_private *priv = walk->private;
+ struct damos *s = priv->scheme;
+ unsigned long *sz_filter_passed = priv->sz_filter_passed;
+ struct vm_area_struct *vma = walk->vma;
+ struct folio *folio;
+ spinlock_t *ptl;
+ pte_t *start_pte, *pte, ptent;
+ int nr;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
+ pmd_t pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde))
+ goto huge_unlock;
+
+ folio = vm_normal_folio_pmd(vma, addr, pmde);
+
+ if (damos_va_invalid_folio(folio, s))
+ goto huge_unlock;
+
+ if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd))
+ *sz_filter_passed += folio_size(folio);
+ s->last_applied = folio;
+
+huge_unlock:
+ spin_unlock(ptl);
+ return 0;
+ }
+#endif
+ start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ if (!start_pte)
+ return 0;
+
+ for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
+ nr = 1;
+ ptent = ptep_get(pte);
+
+ if (pte_none(ptent) || !pte_present(ptent))
+ continue;
+
+ folio = vm_normal_folio(vma, addr, ptent);
+
+ if (damos_va_invalid_folio(folio, s))
+ continue;
+
+ if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL))
+ *sz_filter_passed += folio_size(folio);
+ nr = folio_nr_pages(folio);
+ s->last_applied = folio;
+ }
+ pte_unmap_unlock(start_pte, ptl);
+ return 0;
+}
+
+static unsigned long damos_va_stat(struct damon_target *target,
+ struct damon_region *r, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ struct damos_va_stat_private priv;
+ struct mm_struct *mm;
+ struct mm_walk_ops walk_ops = {
+ .pmd_entry = damos_va_stat_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
+ };
+
+ priv.scheme = s;
+ priv.sz_filter_passed = sz_filter_passed;
+
+ if (!damos_ops_has_filter(s))
+ return 0;
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ return 0;
+
+ mmap_read_lock(mm);
+ walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
+ mmap_read_unlock(mm);
+ mmput(mm);
+ return 0;
+}
+
static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
struct damon_target *t, struct damon_region *r,
- struct damos *scheme)
+ struct damos *scheme, unsigned long *sz_filter_passed)
{
int madv_action;
@@ -721,9 +970,15 @@ static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
case DAMOS_NOHUGEPAGE:
madv_action = MADV_NOHUGEPAGE;
break;
+ case DAMOS_MIGRATE_HOT:
+ case DAMOS_MIGRATE_COLD:
+ return damos_va_migrate(t, r, scheme, sz_filter_passed);
case DAMOS_STAT:
- return 0;
+ return damos_va_stat(t, r, scheme, sz_filter_passed);
default:
+ /*
+ * DAMOS actions that are not yet supported by 'vaddr'.
+ */
return 0;
}
@@ -737,7 +992,11 @@ static int damon_va_scheme_score(struct damon_ctx *context,
switch (scheme->action) {
case DAMOS_PAGEOUT:
- return damon_pageout_score(context, r, scheme);
+ return damon_cold_score(context, r, scheme);
+ case DAMOS_MIGRATE_HOT:
+ return damon_hot_score(context, r, scheme);
+ case DAMOS_MIGRATE_COLD:
+ return damon_cold_score(context, r, scheme);
default:
break;
}
@@ -745,17 +1004,35 @@ static int damon_va_scheme_score(struct damon_ctx *context,
return DAMOS_MAX_SCORE;
}
-void damon_va_set_primitives(struct damon_ctx *ctx)
+static int __init damon_va_initcall(void)
{
- ctx->primitive.init = damon_va_init;
- ctx->primitive.update = damon_va_update;
- ctx->primitive.prepare_access_checks = damon_va_prepare_access_checks;
- ctx->primitive.check_accesses = damon_va_check_accesses;
- ctx->primitive.reset_aggregated = NULL;
- ctx->primitive.target_valid = damon_va_target_valid;
- ctx->primitive.cleanup = NULL;
- ctx->primitive.apply_scheme = damon_va_apply_scheme;
- ctx->primitive.get_scheme_score = damon_va_scheme_score;
-}
+ struct damon_operations ops = {
+ .id = DAMON_OPS_VADDR,
+ .init = damon_va_init,
+ .update = damon_va_update,
+ .prepare_access_checks = damon_va_prepare_access_checks,
+ .check_accesses = damon_va_check_accesses,
+ .target_valid = damon_va_target_valid,
+ .cleanup_target = damon_va_cleanup_target,
+ .cleanup = NULL,
+ .apply_scheme = damon_va_apply_scheme,
+ .get_scheme_score = damon_va_scheme_score,
+ };
+ /* ops for fixed virtual address ranges */
+ struct damon_operations ops_fvaddr = ops;
+ int err;
+
+ /* Don't set the monitoring target regions for the entire mapping */
+ ops_fvaddr.id = DAMON_OPS_FVADDR;
+ ops_fvaddr.init = NULL;
+ ops_fvaddr.update = NULL;
+
+ err = damon_register_ops(&ops);
+ if (err)
+ return err;
+ return damon_register_ops(&ops_fvaddr);
+};
+
+subsys_initcall(damon_va_initcall);
-#include "vaddr-test.h"
+#include "tests/vaddr-kunit.h"