diff options
Diffstat (limited to 'mm/damon/vaddr.c')
| -rw-r--r-- | mm/damon/vaddr.c | 699 |
1 files changed, 488 insertions, 211 deletions
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 89b6468da2b9..2750c88e7225 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -1,21 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 /* - * DAMON Primitives for Virtual Address Spaces + * DAMON Code for Virtual Address Spaces * - * Author: SeongJae Park <sjpark@amazon.de> + * Author: SeongJae Park <sj@kernel.org> */ #define pr_fmt(fmt) "damon-va: " fmt -#include <asm-generic/mman-common.h> #include <linux/highmem.h> #include <linux/hugetlb.h> +#include <linux/mman.h> #include <linux/mmu_notifier.h> #include <linux/page_idle.h> #include <linux/pagewalk.h> #include <linux/sched/mm.h> -#include "prmtv-common.h" +#include "../internal.h" +#include "ops-common.h" #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST #undef DAMON_MIN_REGION @@ -23,12 +24,12 @@ #endif /* - * 't->id' should be the pointer to the relevant 'struct pid' having reference + * 't->pid' should be the pointer to the relevant 'struct pid' having reference * count. Caller must put the returned task, unless it is NULL. */ static inline struct task_struct *damon_get_task_struct(struct damon_target *t) { - return get_pid_task((struct pid *)t->id, PIDTYPE_PID); + return get_pid_task(t->pid, PIDTYPE_PID); } /* @@ -67,12 +68,16 @@ static int damon_va_evenly_split_region(struct damon_target *t, unsigned long sz_orig, sz_piece, orig_end; struct damon_region *n = NULL, *next; unsigned long start; + unsigned int i; if (!r || !nr_pieces) return -EINVAL; + if (nr_pieces == 1) + return 0; + orig_end = r->ar.end; - sz_orig = r->ar.end - r->ar.start; + sz_orig = damon_sz_region(r); sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); if (!sz_piece) @@ -80,8 +85,7 @@ static int damon_va_evenly_split_region(struct damon_target *t, r->ar.end = r->ar.start + sz_piece; next = damon_next_region(r); - for (start = r->ar.end; start + sz_piece <= orig_end; - start += sz_piece) { + for (start = r->ar.end, i = 1; i < nr_pieces; start += sz_piece, i++) { n = damon_new_region(start, start + sz_piece); if (!n) return -ENOMEM; @@ -113,38 +117,41 @@ static unsigned long sz_range(struct damon_addr_range *r) * * Returns 0 if success, or negative error code otherwise. */ -static int __damon_va_three_regions(struct vm_area_struct *vma, +static int __damon_va_three_regions(struct mm_struct *mm, struct damon_addr_range regions[3]) { - struct damon_addr_range gap = {0}, first_gap = {0}, second_gap = {0}; - struct vm_area_struct *last_vma = NULL; - unsigned long start = 0; - struct rb_root rbroot; + struct damon_addr_range first_gap = {0}, second_gap = {0}; + VMA_ITERATOR(vmi, mm, 0); + struct vm_area_struct *vma, *prev = NULL; + unsigned long start; - /* Find two biggest gaps so that first_gap > second_gap > others */ - for (; vma; vma = vma->vm_next) { - if (!last_vma) { - start = vma->vm_start; - goto next; - } + /* + * Find the two biggest gaps so that first_gap > second_gap > others. + * If this is too slow, it can be optimised to examine the maple + * tree gaps. + */ + rcu_read_lock(); + for_each_vma(vmi, vma) { + unsigned long gap; - if (vma->rb_subtree_gap <= sz_range(&second_gap)) { - rbroot.rb_node = &vma->vm_rb; - vma = rb_entry(rb_last(&rbroot), - struct vm_area_struct, vm_rb); + if (!prev) { + start = vma->vm_start; goto next; } - - gap.start = last_vma->vm_end; - gap.end = vma->vm_start; - if (sz_range(&gap) > sz_range(&second_gap)) { - swap(gap, second_gap); - if (sz_range(&second_gap) > sz_range(&first_gap)) - swap(second_gap, first_gap); + gap = vma->vm_start - prev->vm_end; + + if (gap > sz_range(&first_gap)) { + second_gap = first_gap; + first_gap.start = prev->vm_end; + first_gap.end = vma->vm_start; + } else if (gap > sz_range(&second_gap)) { + second_gap.start = prev->vm_end; + second_gap.end = vma->vm_start; } next: - last_vma = vma; + prev = vma; } + rcu_read_unlock(); if (!sz_range(&second_gap) || !sz_range(&first_gap)) return -EINVAL; @@ -159,7 +166,7 @@ next: regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); - regions[2].end = ALIGN(last_vma->vm_end, DAMON_MIN_REGION); + regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION); return 0; } @@ -180,7 +187,7 @@ static int damon_va_three_regions(struct damon_target *t, return -EINVAL; mmap_read_lock(mm); - rc = __damon_va_three_regions(mm->mmap, regions); + rc = __damon_va_three_regions(mm, regions); mmap_read_unlock(mm); mmput(mm); @@ -250,8 +257,8 @@ static void __damon_va_init_regions(struct damon_ctx *ctx, for (i = 0; i < 3; i++) sz += regions[i].end - regions[i].start; - if (ctx->min_nr_regions) - sz /= ctx->min_nr_regions; + if (ctx->attrs.min_nr_regions) + sz /= ctx->attrs.min_nr_regions; if (sz < DAMON_MIN_REGION) sz = DAMON_MIN_REGION; @@ -282,77 +289,6 @@ static void damon_va_init(struct damon_ctx *ctx) } /* - * Functions for the dynamic monitoring target regions update - */ - -/* - * Check whether a region is intersecting an address range - * - * Returns true if it is. - */ -static bool damon_intersect(struct damon_region *r, - struct damon_addr_range *re) -{ - return !(r->ar.end <= re->start || re->end <= r->ar.start); -} - -/* - * Update damon regions for the three big regions of the given target - * - * t the given target - * bregions the three big regions of the target - */ -static void damon_va_apply_three_regions(struct damon_target *t, - struct damon_addr_range bregions[3]) -{ - struct damon_region *r, *next; - unsigned int i; - - /* Remove regions which are not in the three big regions now */ - damon_for_each_region_safe(r, next, t) { - for (i = 0; i < 3; i++) { - if (damon_intersect(r, &bregions[i])) - break; - } - if (i == 3) - damon_destroy_region(r, t); - } - - /* Adjust intersecting regions to fit with the three big regions */ - for (i = 0; i < 3; i++) { - struct damon_region *first = NULL, *last; - struct damon_region *newr; - struct damon_addr_range *br; - - br = &bregions[i]; - /* Get the first and last regions which intersects with br */ - damon_for_each_region(r, t) { - if (damon_intersect(r, br)) { - if (!first) - first = r; - last = r; - } - if (r->ar.start >= br->end) - break; - } - if (!first) { - /* no damon_region intersects with this big region */ - newr = damon_new_region( - ALIGN_DOWN(br->start, - DAMON_MIN_REGION), - ALIGN(br->end, DAMON_MIN_REGION)); - if (!newr) - continue; - damon_insert_region(newr, damon_prev_region(r), r, t); - } else { - first->ar.start = ALIGN_DOWN(br->start, - DAMON_MIN_REGION); - last->ar.end = ALIGN(br->end, DAMON_MIN_REGION); - } - } -} - -/* * Update regions for current memory mappings */ static void damon_va_update(struct damon_ctx *ctx) @@ -363,7 +299,7 @@ static void damon_va_update(struct damon_ctx *ctx) damon_for_each_target(t, ctx) { if (damon_va_three_regions(t, three_regions)) continue; - damon_va_apply_three_regions(t, three_regions); + damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION); } } @@ -373,22 +309,22 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, pte_t *pte; spinlock_t *ptl; - if (pmd_huge(*pmd)) { - ptl = pmd_lock(walk->mm, pmd); - if (pmd_huge(*pmd)) { - damon_pmdp_mkold(pmd, walk->mm, addr); - spin_unlock(ptl); - return 0; - } + ptl = pmd_trans_huge_lock(pmd, walk->vma); + if (ptl) { + pmd_t pmde = pmdp_get(pmd); + + if (pmd_present(pmde)) + damon_pmdp_mkold(pmd, walk->vma, addr); spin_unlock(ptl); + return 0; } - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return 0; pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - if (!pte_present(*pte)) + if (!pte) + return 0; + if (!pte_present(ptep_get(pte))) goto out; - damon_ptep_mkold(pte, walk->mm, addr); + damon_ptep_mkold(pte, walk->vma, addr); out: pte_unmap_unlock(pte, ptl); return 0; @@ -399,32 +335,27 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) { bool referenced = false; - pte_t entry = huge_ptep_get(pte); - struct page *page = pte_page(entry); + pte_t entry = huge_ptep_get(mm, addr, pte); + struct folio *folio = pfn_folio(pte_pfn(entry)); + unsigned long psize = huge_page_size(hstate_vma(vma)); - if (!page) - return; - - get_page(page); + folio_get(folio); if (pte_young(entry)) { referenced = true; entry = pte_mkold(entry); - huge_ptep_set_access_flags(vma, addr, pte, entry, - vma->vm_flags & VM_WRITE); + set_huge_pte_at(mm, addr, pte, entry, psize); } -#ifdef CONFIG_MMU_NOTIFIER if (mmu_notifier_clear_young(mm, addr, addr + huge_page_size(hstate_vma(vma)))) referenced = true; -#endif /* CONFIG_MMU_NOTIFIER */ if (referenced) - set_page_young(page); + folio_set_young(folio); - set_page_idle(page); - put_page(page); + folio_set_idle(folio); + folio_put(folio); } static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, @@ -436,7 +367,7 @@ static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, pte_t entry; ptl = huge_pte_lock(h, walk->mm, pte); - entry = huge_ptep_get(pte); + entry = huge_ptep_get(walk->mm, addr, pte); if (!pte_present(entry)) goto out; @@ -453,6 +384,7 @@ out: static const struct mm_walk_ops damon_mkold_ops = { .pmd_entry = damon_mkold_pmd_entry, .hugetlb_entry = damon_mkold_hugetlb_entry, + .walk_lock = PGWALK_RDLOCK, }; static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) @@ -466,8 +398,8 @@ static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) * Functions for the access checking of the regions */ -static void __damon_va_prepare_access_check(struct damon_ctx *ctx, - struct mm_struct *mm, struct damon_region *r) +static void __damon_va_prepare_access_check(struct mm_struct *mm, + struct damon_region *r) { r->sampling_addr = damon_rand(r->ar.start, r->ar.end); @@ -485,13 +417,14 @@ static void damon_va_prepare_access_checks(struct damon_ctx *ctx) if (!mm) continue; damon_for_each_region(r, t) - __damon_va_prepare_access_check(ctx, mm, r); + __damon_va_prepare_access_check(mm, r); mmput(mm); } } struct damon_young_walk_private { - unsigned long *page_sz; + /* size of the folio for the access checked virtual memory address */ + unsigned long *folio_sz; bool young; }; @@ -499,49 +432,45 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { pte_t *pte; + pte_t ptent; spinlock_t *ptl; - struct page *page; + struct folio *folio; struct damon_young_walk_private *priv = walk->private; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_huge(*pmd)) { - ptl = pmd_lock(walk->mm, pmd); - if (!pmd_huge(*pmd)) { - spin_unlock(ptl); - goto regular_page; - } - page = damon_get_page(pmd_pfn(*pmd)); - if (!page) + ptl = pmd_trans_huge_lock(pmd, walk->vma); + if (ptl) { + pmd_t pmde = pmdp_get(pmd); + + if (!pmd_present(pmde)) + goto huge_out; + folio = vm_normal_folio_pmd(walk->vma, addr, pmde); + if (!folio) goto huge_out; - if (pmd_young(*pmd) || !page_is_idle(page) || + if (pmd_young(pmde) || !folio_test_idle(folio) || mmu_notifier_test_young(walk->mm, - addr)) { - *priv->page_sz = ((1UL) << HPAGE_PMD_SHIFT); + addr)) priv->young = true; - } - put_page(page); + *priv->folio_sz = HPAGE_PMD_SIZE; huge_out: spin_unlock(ptl); return 0; } - -regular_page: #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return -EINVAL; pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - if (!pte_present(*pte)) + if (!pte) + return 0; + ptent = ptep_get(pte); + if (!pte_present(ptent)) goto out; - page = damon_get_page(pte_pfn(*pte)); - if (!page) + folio = vm_normal_folio(walk->vma, addr, ptent); + if (!folio) goto out; - if (pte_young(*pte) || !page_is_idle(page) || - mmu_notifier_test_young(walk->mm, addr)) { - *priv->page_sz = PAGE_SIZE; + if (pte_young(ptent) || !folio_test_idle(folio) || + mmu_notifier_test_young(walk->mm, addr)) priv->young = true; - } - put_page(page); + *priv->folio_sz = folio_size(folio); out: pte_unmap_unlock(pte, ptl); return 0; @@ -554,28 +483,24 @@ static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, { struct damon_young_walk_private *priv = walk->private; struct hstate *h = hstate_vma(walk->vma); - struct page *page; + struct folio *folio; spinlock_t *ptl; pte_t entry; ptl = huge_pte_lock(h, walk->mm, pte); - entry = huge_ptep_get(pte); + entry = huge_ptep_get(walk->mm, addr, pte); if (!pte_present(entry)) goto out; - page = pte_page(entry); - if (!page) - goto out; - - get_page(page); + folio = pfn_folio(pte_pfn(entry)); + folio_get(folio); - if (pte_young(entry) || !page_is_idle(page) || - mmu_notifier_test_young(walk->mm, addr)) { - *priv->page_sz = huge_page_size(h); + if (pte_young(entry) || !folio_test_idle(folio) || + mmu_notifier_test_young(walk->mm, addr)) priv->young = true; - } + *priv->folio_sz = huge_page_size(h); - put_page(page); + folio_put(folio); out: spin_unlock(ptl); @@ -588,13 +513,14 @@ out: static const struct mm_walk_ops damon_young_ops = { .pmd_entry = damon_young_pmd_entry, .hugetlb_entry = damon_young_hugetlb_entry, + .walk_lock = PGWALK_RDLOCK, }; static bool damon_va_young(struct mm_struct *mm, unsigned long addr, - unsigned long *page_sz) + unsigned long *folio_sz) { struct damon_young_walk_private arg = { - .page_sz = page_sz, + .folio_sz = folio_sz, .young = false, }; @@ -610,27 +536,29 @@ static bool damon_va_young(struct mm_struct *mm, unsigned long addr, * mm 'mm_struct' for the given virtual address space * r the region to be checked */ -static void __damon_va_check_access(struct damon_ctx *ctx, - struct mm_struct *mm, struct damon_region *r) +static void __damon_va_check_access(struct mm_struct *mm, + struct damon_region *r, bool same_target, + struct damon_attrs *attrs) { - static struct mm_struct *last_mm; static unsigned long last_addr; - static unsigned long last_page_sz = PAGE_SIZE; + static unsigned long last_folio_sz = PAGE_SIZE; static bool last_accessed; + if (!mm) { + damon_update_region_access_rate(r, false, attrs); + return; + } + /* If the region is in the last checked page, reuse the result */ - if (mm == last_mm && (ALIGN_DOWN(last_addr, last_page_sz) == - ALIGN_DOWN(r->sampling_addr, last_page_sz))) { - if (last_accessed) - r->nr_accesses++; + if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == + ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { + damon_update_region_access_rate(r, last_accessed, attrs); return; } - last_accessed = damon_va_young(mm, r->sampling_addr, &last_page_sz); - if (last_accessed) - r->nr_accesses++; + last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); + damon_update_region_access_rate(r, last_accessed, attrs); - last_mm = mm; last_addr = r->sampling_addr; } @@ -640,28 +568,196 @@ static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) struct mm_struct *mm; struct damon_region *r; unsigned int max_nr_accesses = 0; + bool same_target; damon_for_each_target(t, ctx) { mm = damon_get_mm(t); - if (!mm) - continue; + same_target = false; damon_for_each_region(r, t) { - __damon_va_check_access(ctx, mm, r); + __damon_va_check_access(mm, r, same_target, + &ctx->attrs); max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + same_target = true; } - mmput(mm); + if (mm) + mmput(mm); } return max_nr_accesses; } +static bool damos_va_filter_young_match(struct damos_filter *filter, + struct folio *folio, struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, pmd_t *pmdp) +{ + bool young = false; + + if (ptep) + young = pte_young(ptep_get(ptep)); + else if (pmdp) + young = pmd_young(pmdp_get(pmdp)); + + young = young || !folio_test_idle(folio) || + mmu_notifier_test_young(vma->vm_mm, addr); + + if (young && ptep) + damon_ptep_mkold(ptep, vma, addr); + else if (young && pmdp) + damon_pmdp_mkold(pmdp, vma, addr); + + return young == filter->matching; +} + +static bool damos_va_filter_out(struct damos *scheme, struct folio *folio, + struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pmd_t *pmdp) +{ + struct damos_filter *filter; + bool matched; + + if (scheme->core_filters_allowed) + return false; + + damos_for_each_ops_filter(filter, scheme) { + /* + * damos_folio_filter_match checks the young filter by doing an + * rmap on the folio to find its page table. However, being the + * vaddr scheme, we have direct access to the page tables, so + * use that instead. + */ + if (filter->type == DAMOS_FILTER_TYPE_YOUNG) + matched = damos_va_filter_young_match(filter, folio, + vma, addr, ptep, pmdp); + else + matched = damos_folio_filter_match(filter, folio); + + if (matched) + return !filter->allow; + } + return scheme->ops_filters_default_reject; +} + +struct damos_va_migrate_private { + struct list_head *migration_lists; + struct damos *scheme; +}; + +/* + * Place the given folio in the migration_list corresponding to where the folio + * should be migrated. + * + * The algorithm used here is similar to weighted_interleave_nid() + */ +static void damos_va_migrate_dests_add(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr, + struct damos_migrate_dests *dests, + struct list_head *migration_lists) +{ + pgoff_t ilx; + int order; + unsigned int target; + unsigned int weight_total = 0; + int i; + + /* + * If dests is empty, there is only one migration list corresponding + * to s->target_nid. + */ + if (!dests->nr_dests) { + i = 0; + goto isolate; + } + + order = folio_order(folio); + ilx = vma->vm_pgoff >> order; + ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); + + for (i = 0; i < dests->nr_dests; i++) + weight_total += dests->weight_arr[i]; + + /* If the total weights are somehow 0, don't migrate at all */ + if (!weight_total) + return; + + target = ilx % weight_total; + for (i = 0; i < dests->nr_dests; i++) { + if (target < dests->weight_arr[i]) + break; + target -= dests->weight_arr[i]; + } + + /* If the folio is already in the right node, don't do anything */ + if (folio_nid(folio) == dests->node_id_arr[i]) + return; + +isolate: + if (!folio_isolate_lru(folio)) + return; + + list_add(&folio->lru, &migration_lists[i]); +} + +static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + struct damos_va_migrate_private *priv = walk->private; + struct list_head *migration_lists = priv->migration_lists; + struct damos *s = priv->scheme; + struct damos_migrate_dests *dests = &s->migrate_dests; + struct folio *folio; + spinlock_t *ptl; + pte_t *start_pte, *pte, ptent; + int nr; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + ptl = pmd_trans_huge_lock(pmd, walk->vma); + if (ptl) { + pmd_t pmde = pmdp_get(pmd); + + if (!pmd_present(pmde)) + goto huge_out; + folio = vm_normal_folio_pmd(walk->vma, addr, pmde); + if (!folio) + goto huge_out; + if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)) + goto huge_out; + damos_va_migrate_dests_add(folio, walk->vma, addr, dests, + migration_lists); +huge_out: + spin_unlock(ptl); + return 0; + } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + + start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte) + return 0; + + for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { + nr = 1; + ptent = ptep_get(pte); + + if (pte_none(ptent) || !pte_present(ptent)) + continue; + folio = vm_normal_folio(walk->vma, addr, ptent); + if (!folio) + continue; + if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)) + return 0; + damos_va_migrate_dests_add(folio, walk->vma, addr, dests, + migration_lists); + nr = folio_nr_pages(folio); + } + pte_unmap_unlock(start_pte, ptl); + return 0; +} + /* * Functions for the target validity check and cleanup */ -bool damon_va_target_valid(void *target) +static bool damon_va_target_valid(struct damon_target *t) { - struct damon_target *t = target; struct task_struct *task; task = damon_get_task_struct(t); @@ -673,6 +769,11 @@ bool damon_va_target_valid(void *target) return false; } +static void damon_va_cleanup_target(struct damon_target *t) +{ + put_pid(t->pid); +} + #ifndef CONFIG_ADVISE_SYSCALLS static unsigned long damos_madvise(struct damon_target *target, struct damon_region *r, int behavior) @@ -685,7 +786,7 @@ static unsigned long damos_madvise(struct damon_target *target, { struct mm_struct *mm; unsigned long start = PAGE_ALIGN(r->ar.start); - unsigned long len = PAGE_ALIGN(r->ar.end - r->ar.start); + unsigned long len = PAGE_ALIGN(damon_sz_region(r)); unsigned long applied; mm = damon_get_mm(target); @@ -699,9 +800,157 @@ static unsigned long damos_madvise(struct damon_target *target, } #endif /* CONFIG_ADVISE_SYSCALLS */ +static unsigned long damos_va_migrate(struct damon_target *target, + struct damon_region *r, struct damos *s, + unsigned long *sz_filter_passed) +{ + LIST_HEAD(folio_list); + struct damos_va_migrate_private priv; + struct mm_struct *mm; + int nr_dests; + int nid; + bool use_target_nid; + unsigned long applied = 0; + struct damos_migrate_dests *dests = &s->migrate_dests; + struct mm_walk_ops walk_ops = { + .pmd_entry = damos_va_migrate_pmd_entry, + .pte_entry = NULL, + .walk_lock = PGWALK_RDLOCK, + }; + + use_target_nid = dests->nr_dests == 0; + nr_dests = use_target_nid ? 1 : dests->nr_dests; + priv.scheme = s; + priv.migration_lists = kmalloc_array(nr_dests, + sizeof(*priv.migration_lists), GFP_KERNEL); + if (!priv.migration_lists) + return 0; + + for (int i = 0; i < nr_dests; i++) + INIT_LIST_HEAD(&priv.migration_lists[i]); + + + mm = damon_get_mm(target); + if (!mm) + goto free_lists; + + mmap_read_lock(mm); + walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); + mmap_read_unlock(mm); + mmput(mm); + + for (int i = 0; i < nr_dests; i++) { + nid = use_target_nid ? s->target_nid : dests->node_id_arr[i]; + applied += damon_migrate_pages(&priv.migration_lists[i], nid); + cond_resched(); + } + +free_lists: + kfree(priv.migration_lists); + return applied * PAGE_SIZE; +} + +struct damos_va_stat_private { + struct damos *scheme; + unsigned long *sz_filter_passed; +}; + +static inline bool damos_va_invalid_folio(struct folio *folio, + struct damos *s) +{ + return !folio || folio == s->last_applied; +} + +static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + struct damos_va_stat_private *priv = walk->private; + struct damos *s = priv->scheme; + unsigned long *sz_filter_passed = priv->sz_filter_passed; + struct vm_area_struct *vma = walk->vma; + struct folio *folio; + spinlock_t *ptl; + pte_t *start_pte, *pte, ptent; + int nr; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + ptl = pmd_trans_huge_lock(pmd, vma); + if (ptl) { + pmd_t pmde = pmdp_get(pmd); + + if (!pmd_present(pmde)) + goto huge_unlock; + + folio = vm_normal_folio_pmd(vma, addr, pmde); + + if (damos_va_invalid_folio(folio, s)) + goto huge_unlock; + + if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd)) + *sz_filter_passed += folio_size(folio); + s->last_applied = folio; + +huge_unlock: + spin_unlock(ptl); + return 0; + } +#endif + start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!start_pte) + return 0; + + for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { + nr = 1; + ptent = ptep_get(pte); + + if (pte_none(ptent) || !pte_present(ptent)) + continue; + + folio = vm_normal_folio(vma, addr, ptent); + + if (damos_va_invalid_folio(folio, s)) + continue; + + if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL)) + *sz_filter_passed += folio_size(folio); + nr = folio_nr_pages(folio); + s->last_applied = folio; + } + pte_unmap_unlock(start_pte, ptl); + return 0; +} + +static unsigned long damos_va_stat(struct damon_target *target, + struct damon_region *r, struct damos *s, + unsigned long *sz_filter_passed) +{ + struct damos_va_stat_private priv; + struct mm_struct *mm; + struct mm_walk_ops walk_ops = { + .pmd_entry = damos_va_stat_pmd_entry, + .walk_lock = PGWALK_RDLOCK, + }; + + priv.scheme = s; + priv.sz_filter_passed = sz_filter_passed; + + if (!damos_ops_has_filter(s)) + return 0; + + mm = damon_get_mm(target); + if (!mm) + return 0; + + mmap_read_lock(mm); + walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); + mmap_read_unlock(mm); + mmput(mm); + return 0; +} + static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t, struct damon_region *r, - struct damos *scheme) + struct damos *scheme, unsigned long *sz_filter_passed) { int madv_action; @@ -721,9 +970,15 @@ static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, case DAMOS_NOHUGEPAGE: madv_action = MADV_NOHUGEPAGE; break; + case DAMOS_MIGRATE_HOT: + case DAMOS_MIGRATE_COLD: + return damos_va_migrate(t, r, scheme, sz_filter_passed); case DAMOS_STAT: - return 0; + return damos_va_stat(t, r, scheme, sz_filter_passed); default: + /* + * DAMOS actions that are not yet supported by 'vaddr'. + */ return 0; } @@ -737,7 +992,11 @@ static int damon_va_scheme_score(struct damon_ctx *context, switch (scheme->action) { case DAMOS_PAGEOUT: - return damon_pageout_score(context, r, scheme); + return damon_cold_score(context, r, scheme); + case DAMOS_MIGRATE_HOT: + return damon_hot_score(context, r, scheme); + case DAMOS_MIGRATE_COLD: + return damon_cold_score(context, r, scheme); default: break; } @@ -745,17 +1004,35 @@ static int damon_va_scheme_score(struct damon_ctx *context, return DAMOS_MAX_SCORE; } -void damon_va_set_primitives(struct damon_ctx *ctx) +static int __init damon_va_initcall(void) { - ctx->primitive.init = damon_va_init; - ctx->primitive.update = damon_va_update; - ctx->primitive.prepare_access_checks = damon_va_prepare_access_checks; - ctx->primitive.check_accesses = damon_va_check_accesses; - ctx->primitive.reset_aggregated = NULL; - ctx->primitive.target_valid = damon_va_target_valid; - ctx->primitive.cleanup = NULL; - ctx->primitive.apply_scheme = damon_va_apply_scheme; - ctx->primitive.get_scheme_score = damon_va_scheme_score; -} + struct damon_operations ops = { + .id = DAMON_OPS_VADDR, + .init = damon_va_init, + .update = damon_va_update, + .prepare_access_checks = damon_va_prepare_access_checks, + .check_accesses = damon_va_check_accesses, + .target_valid = damon_va_target_valid, + .cleanup_target = damon_va_cleanup_target, + .cleanup = NULL, + .apply_scheme = damon_va_apply_scheme, + .get_scheme_score = damon_va_scheme_score, + }; + /* ops for fixed virtual address ranges */ + struct damon_operations ops_fvaddr = ops; + int err; + + /* Don't set the monitoring target regions for the entire mapping */ + ops_fvaddr.id = DAMON_OPS_FVADDR; + ops_fvaddr.init = NULL; + ops_fvaddr.update = NULL; + + err = damon_register_ops(&ops); + if (err) + return err; + return damon_register_ops(&ops_fvaddr); +}; + +subsys_initcall(damon_va_initcall); -#include "vaddr-test.h" +#include "tests/vaddr-kunit.h" |
