diff options
Diffstat (limited to 'mm/damon')
| -rw-r--r-- | mm/damon/Kconfig | 113 | ||||
| -rw-r--r-- | mm/damon/Makefile | 9 | ||||
| -rw-r--r-- | mm/damon/core.c | 2941 | ||||
| -rw-r--r-- | mm/damon/lru_sort.c | 394 | ||||
| -rw-r--r-- | mm/damon/modules-common.c | 42 | ||||
| -rw-r--r-- | mm/damon/modules-common.h | 49 | ||||
| -rw-r--r-- | mm/damon/ops-common.c | 429 | ||||
| -rw-r--r-- | mm/damon/ops-common.h | 25 | ||||
| -rw-r--r-- | mm/damon/paddr.c | 386 | ||||
| -rw-r--r-- | mm/damon/reclaim.c | 398 | ||||
| -rw-r--r-- | mm/damon/stat.c | 276 | ||||
| -rw-r--r-- | mm/damon/sysfs-common.c | 107 | ||||
| -rw-r--r-- | mm/damon/sysfs-common.h | 61 | ||||
| -rw-r--r-- | mm/damon/sysfs-schemes.c | 2832 | ||||
| -rw-r--r-- | mm/damon/sysfs.c | 2108 | ||||
| -rw-r--r-- | mm/damon/tests/.kunitconfig | 15 | ||||
| -rw-r--r-- | mm/damon/tests/core-kunit.h | 1249 | ||||
| -rw-r--r-- | mm/damon/tests/sysfs-kunit.h | 112 | ||||
| -rw-r--r-- | mm/damon/tests/vaddr-kunit.h | 348 | ||||
| -rw-r--r-- | mm/damon/vaddr.c | 1038 |
20 files changed, 12932 insertions, 0 deletions
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig new file mode 100644 index 000000000000..8c868f7035fc --- /dev/null +++ b/mm/damon/Kconfig @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: GPL-2.0-only + +menu "Data Access Monitoring" + +config DAMON + bool "DAMON: Data Access Monitoring Framework" + help + This builds a framework that allows kernel subsystems to monitor + access frequency of each memory region. The information can be useful + for performance-centric DRAM level memory management. + + See https://www.kernel.org/doc/html/latest/mm/damon/index.html for + more information. + +config DAMON_KUNIT_TEST + bool "Test for damon" if !KUNIT_ALL_TESTS + depends on DAMON && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_VADDR + bool "Data access monitoring operations for virtual address spaces" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + default DAMON + help + This builds the default data access monitoring operations for DAMON + that work for virtual address spaces. + +config DAMON_PADDR + bool "Data access monitoring operations for the physical address space" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + default DAMON + help + This builds the default data access monitoring operations for DAMON + that works for the physical address space. + +config DAMON_VADDR_KUNIT_TEST + bool "Test for DAMON operations" if !KUNIT_ALL_TESTS + depends on DAMON_VADDR && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON virtual addresses operations Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_SYSFS + bool "DAMON sysfs interface" + depends on DAMON && SYSFS + default DAMON + help + This builds the sysfs interface for DAMON. The user space can use + the interface for arbitrary data access monitoring. + +config DAMON_SYSFS_KUNIT_TEST + bool "Test for damon sysfs interface" if !KUNIT_ALL_TESTS + depends on DAMON_SYSFS && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON sysfs interface Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_RECLAIM + bool "Build DAMON-based reclaim (DAMON_RECLAIM)" + depends on DAMON_PADDR + help + This builds the DAMON-based reclamation subsystem. It finds pages + that not accessed for a long time (cold) using DAMON and reclaim + those. + + This is suggested to be used as a proactive and lightweight + reclamation under light memory pressure, while the traditional page + scanning-based reclamation is used for heavy pressure. + +config DAMON_LRU_SORT + bool "Build DAMON-based LRU-lists sorting (DAMON_LRU_SORT)" + depends on DAMON_PADDR + help + This builds the DAMON-based LRU-lists sorting subsystem. It tries to + protect frequently accessed (hot) pages while rarely accessed (cold) + pages reclaimed first under memory pressure. + +config DAMON_STAT + bool "Build data access monitoring stat (DAMON_STAT)" + depends on DAMON_PADDR + help + This builds the DAMON-based access monitoring statistics subsystem. + It runs DAMON and expose access monitoring results in simple stat + metrics. + +config DAMON_STAT_ENABLED_DEFAULT + bool "Enable DAMON_STAT by default" + depends on DAMON_STAT + default DAMON_STAT + help + Whether to enable DAMON_STAT by default. Users can disable it in + boot or runtime using its 'enabled' parameter. + +endmenu diff --git a/mm/damon/Makefile b/mm/damon/Makefile new file mode 100644 index 000000000000..d8d6bf5f8bff --- /dev/null +++ b/mm/damon/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y := core.o +obj-$(CONFIG_DAMON_VADDR) += ops-common.o vaddr.o +obj-$(CONFIG_DAMON_PADDR) += ops-common.o paddr.o +obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs-schemes.o sysfs.o +obj-$(CONFIG_DAMON_RECLAIM) += modules-common.o reclaim.o +obj-$(CONFIG_DAMON_LRU_SORT) += modules-common.o lru_sort.o +obj-$(CONFIG_DAMON_STAT) += modules-common.o stat.o diff --git a/mm/damon/core.c b/mm/damon/core.c new file mode 100644 index 000000000000..f9fc0375890a --- /dev/null +++ b/mm/damon/core.c @@ -0,0 +1,2941 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Data Access Monitor + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#define pr_fmt(fmt) "damon: " fmt + +#include <linux/damon.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/memcontrol.h> +#include <linux/mm.h> +#include <linux/psi.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/string_choices.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/damon.h> + +static DEFINE_MUTEX(damon_lock); +static int nr_running_ctxs; +static bool running_exclusive_ctxs; + +static DEFINE_MUTEX(damon_ops_lock); +static struct damon_operations damon_registered_ops[NR_DAMON_OPS]; + +static struct kmem_cache *damon_region_cache __ro_after_init; + +/* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */ +static bool __damon_is_registered_ops(enum damon_ops_id id) +{ + struct damon_operations empty_ops = {}; + + if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops))) + return false; + return true; +} + +/** + * damon_is_registered_ops() - Check if a given damon_operations is registered. + * @id: Id of the damon_operations to check if registered. + * + * Return: true if the ops is set, false otherwise. + */ +bool damon_is_registered_ops(enum damon_ops_id id) +{ + bool registered; + + if (id >= NR_DAMON_OPS) + return false; + mutex_lock(&damon_ops_lock); + registered = __damon_is_registered_ops(id); + mutex_unlock(&damon_ops_lock); + return registered; +} + +/** + * damon_register_ops() - Register a monitoring operations set to DAMON. + * @ops: monitoring operations set to register. + * + * This function registers a monitoring operations set of valid &struct + * damon_operations->id so that others can find and use them later. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_register_ops(struct damon_operations *ops) +{ + int err = 0; + + if (ops->id >= NR_DAMON_OPS) + return -EINVAL; + + mutex_lock(&damon_ops_lock); + /* Fail for already registered ops */ + if (__damon_is_registered_ops(ops->id)) + err = -EINVAL; + else + damon_registered_ops[ops->id] = *ops; + mutex_unlock(&damon_ops_lock); + return err; +} + +/** + * damon_select_ops() - Select a monitoring operations to use with the context. + * @ctx: monitoring context to use the operations. + * @id: id of the registered monitoring operations to select. + * + * This function finds registered monitoring operations set of @id and make + * @ctx to use it. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id) +{ + int err = 0; + + if (id >= NR_DAMON_OPS) + return -EINVAL; + + mutex_lock(&damon_ops_lock); + if (!__damon_is_registered_ops(id)) + err = -EINVAL; + else + ctx->ops = damon_registered_ops[id]; + mutex_unlock(&damon_ops_lock); + return err; +} + +/* + * Construct a damon_region struct + * + * Returns the pointer to the new struct if success, or NULL otherwise + */ +struct damon_region *damon_new_region(unsigned long start, unsigned long end) +{ + struct damon_region *region; + + region = kmem_cache_alloc(damon_region_cache, GFP_KERNEL); + if (!region) + return NULL; + + region->ar.start = start; + region->ar.end = end; + region->nr_accesses = 0; + region->nr_accesses_bp = 0; + INIT_LIST_HEAD(®ion->list); + + region->age = 0; + region->last_nr_accesses = 0; + + return region; +} + +void damon_add_region(struct damon_region *r, struct damon_target *t) +{ + list_add_tail(&r->list, &t->regions_list); + t->nr_regions++; +} + +static void damon_del_region(struct damon_region *r, struct damon_target *t) +{ + list_del(&r->list); + t->nr_regions--; +} + +static void damon_free_region(struct damon_region *r) +{ + kmem_cache_free(damon_region_cache, r); +} + +void damon_destroy_region(struct damon_region *r, struct damon_target *t) +{ + damon_del_region(r, t); + damon_free_region(r); +} + +/* + * Check whether a region is intersecting an address range + * + * Returns true if it is. + */ +static bool damon_intersect(struct damon_region *r, + struct damon_addr_range *re) +{ + return !(r->ar.end <= re->start || re->end <= r->ar.start); +} + +/* + * Fill holes in regions with new regions. + */ +static int damon_fill_regions_holes(struct damon_region *first, + struct damon_region *last, struct damon_target *t) +{ + struct damon_region *r = first; + + damon_for_each_region_from(r, t) { + struct damon_region *next, *newr; + + if (r == last) + break; + next = damon_next_region(r); + if (r->ar.end != next->ar.start) { + newr = damon_new_region(r->ar.end, next->ar.start); + if (!newr) + return -ENOMEM; + damon_insert_region(newr, r, next, t); + } + } + return 0; +} + +/* + * damon_set_regions() - Set regions of a target for given address ranges. + * @t: the given target. + * @ranges: array of new monitoring target ranges. + * @nr_ranges: length of @ranges. + * @min_sz_region: minimum region size. + * + * This function adds new regions to, or modify existing regions of a + * monitoring target to fit in specific ranges. + * + * Return: 0 if success, or negative error code otherwise. + */ +int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, + unsigned int nr_ranges, unsigned long min_sz_region) +{ + struct damon_region *r, *next; + unsigned int i; + int err; + + /* Remove regions which are not in the new ranges */ + damon_for_each_region_safe(r, next, t) { + for (i = 0; i < nr_ranges; i++) { + if (damon_intersect(r, &ranges[i])) + break; + } + if (i == nr_ranges) + damon_destroy_region(r, t); + } + + r = damon_first_region(t); + /* Add new regions or resize existing regions to fit in the ranges */ + for (i = 0; i < nr_ranges; i++) { + struct damon_region *first = NULL, *last, *newr; + struct damon_addr_range *range; + + range = &ranges[i]; + /* Get the first/last regions intersecting with the range */ + damon_for_each_region_from(r, t) { + if (damon_intersect(r, range)) { + if (!first) + first = r; + last = r; + } + if (r->ar.start >= range->end) + break; + } + if (!first) { + /* no region intersects with this range */ + newr = damon_new_region( + ALIGN_DOWN(range->start, + min_sz_region), + ALIGN(range->end, min_sz_region)); + if (!newr) + return -ENOMEM; + damon_insert_region(newr, damon_prev_region(r), r, t); + } else { + /* resize intersecting regions to fit in this range */ + first->ar.start = ALIGN_DOWN(range->start, + min_sz_region); + last->ar.end = ALIGN(range->end, min_sz_region); + + /* fill possible holes in the range */ + err = damon_fill_regions_holes(first, last, t); + if (err) + return err; + } + } + return 0; +} + +struct damos_filter *damos_new_filter(enum damos_filter_type type, + bool matching, bool allow) +{ + struct damos_filter *filter; + + filter = kmalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return NULL; + filter->type = type; + filter->matching = matching; + filter->allow = allow; + INIT_LIST_HEAD(&filter->list); + return filter; +} + +/** + * damos_filter_for_ops() - Return if the filter is ops-hndled one. + * @type: type of the filter. + * + * Return: true if the filter of @type needs to be handled by ops layer, false + * otherwise. + */ +bool damos_filter_for_ops(enum damos_filter_type type) +{ + switch (type) { + case DAMOS_FILTER_TYPE_ADDR: + case DAMOS_FILTER_TYPE_TARGET: + return false; + default: + break; + } + return true; +} + +void damos_add_filter(struct damos *s, struct damos_filter *f) +{ + if (damos_filter_for_ops(f->type)) + list_add_tail(&f->list, &s->ops_filters); + else + list_add_tail(&f->list, &s->core_filters); +} + +static void damos_del_filter(struct damos_filter *f) +{ + list_del(&f->list); +} + +static void damos_free_filter(struct damos_filter *f) +{ + kfree(f); +} + +void damos_destroy_filter(struct damos_filter *f) +{ + damos_del_filter(f); + damos_free_filter(f); +} + +struct damos_quota_goal *damos_new_quota_goal( + enum damos_quota_goal_metric metric, + unsigned long target_value) +{ + struct damos_quota_goal *goal; + + goal = kmalloc(sizeof(*goal), GFP_KERNEL); + if (!goal) + return NULL; + goal->metric = metric; + goal->target_value = target_value; + INIT_LIST_HEAD(&goal->list); + return goal; +} + +void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g) +{ + list_add_tail(&g->list, &q->goals); +} + +static void damos_del_quota_goal(struct damos_quota_goal *g) +{ + list_del(&g->list); +} + +static void damos_free_quota_goal(struct damos_quota_goal *g) +{ + kfree(g); +} + +void damos_destroy_quota_goal(struct damos_quota_goal *g) +{ + damos_del_quota_goal(g); + damos_free_quota_goal(g); +} + +/* initialize fields of @quota that normally API users wouldn't set */ +static struct damos_quota *damos_quota_init(struct damos_quota *quota) +{ + quota->esz = 0; + quota->total_charged_sz = 0; + quota->total_charged_ns = 0; + quota->charged_sz = 0; + quota->charged_from = 0; + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + quota->esz_bp = 0; + return quota; +} + +struct damos *damon_new_scheme(struct damos_access_pattern *pattern, + enum damos_action action, + unsigned long apply_interval_us, + struct damos_quota *quota, + struct damos_watermarks *wmarks, + int target_nid) +{ + struct damos *scheme; + + scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); + if (!scheme) + return NULL; + scheme->pattern = *pattern; + scheme->action = action; + scheme->apply_interval_us = apply_interval_us; + /* + * next_apply_sis will be set when kdamond starts. While kdamond is + * running, it will also updated when it is added to the DAMON context, + * or damon_attrs are updated. + */ + scheme->next_apply_sis = 0; + scheme->walk_completed = false; + INIT_LIST_HEAD(&scheme->core_filters); + INIT_LIST_HEAD(&scheme->ops_filters); + scheme->stat = (struct damos_stat){}; + INIT_LIST_HEAD(&scheme->list); + + scheme->quota = *(damos_quota_init(quota)); + /* quota.goals should be separately set by caller */ + INIT_LIST_HEAD(&scheme->quota.goals); + + scheme->wmarks = *wmarks; + scheme->wmarks.activated = true; + + scheme->migrate_dests = (struct damos_migrate_dests){}; + scheme->target_nid = target_nid; + + return scheme; +} + +static void damos_set_next_apply_sis(struct damos *s, struct damon_ctx *ctx) +{ + unsigned long sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + unsigned long apply_interval = s->apply_interval_us ? + s->apply_interval_us : ctx->attrs.aggr_interval; + + s->next_apply_sis = ctx->passed_sample_intervals + + apply_interval / sample_interval; +} + +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) +{ + list_add_tail(&s->list, &ctx->schemes); + damos_set_next_apply_sis(s, ctx); +} + +static void damon_del_scheme(struct damos *s) +{ + list_del(&s->list); +} + +static void damon_free_scheme(struct damos *s) +{ + kfree(s); +} + +void damon_destroy_scheme(struct damos *s) +{ + struct damos_quota_goal *g, *g_next; + struct damos_filter *f, *next; + + damos_for_each_quota_goal_safe(g, g_next, &s->quota) + damos_destroy_quota_goal(g); + + damos_for_each_core_filter_safe(f, next, s) + damos_destroy_filter(f); + + damos_for_each_ops_filter_safe(f, next, s) + damos_destroy_filter(f); + + kfree(s->migrate_dests.node_id_arr); + kfree(s->migrate_dests.weight_arr); + damon_del_scheme(s); + damon_free_scheme(s); +} + +/* + * Construct a damon_target struct + * + * Returns the pointer to the new struct if success, or NULL otherwise + */ +struct damon_target *damon_new_target(void) +{ + struct damon_target *t; + + t = kmalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return NULL; + + t->pid = NULL; + t->nr_regions = 0; + INIT_LIST_HEAD(&t->regions_list); + INIT_LIST_HEAD(&t->list); + t->obsolete = false; + + return t; +} + +void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) +{ + list_add_tail(&t->list, &ctx->adaptive_targets); +} + +bool damon_targets_empty(struct damon_ctx *ctx) +{ + return list_empty(&ctx->adaptive_targets); +} + +static void damon_del_target(struct damon_target *t) +{ + list_del(&t->list); +} + +void damon_free_target(struct damon_target *t) +{ + struct damon_region *r, *next; + + damon_for_each_region_safe(r, next, t) + damon_free_region(r); + kfree(t); +} + +void damon_destroy_target(struct damon_target *t, struct damon_ctx *ctx) +{ + + if (ctx && ctx->ops.cleanup_target) + ctx->ops.cleanup_target(t); + + damon_del_target(t); + damon_free_target(t); +} + +unsigned int damon_nr_regions(struct damon_target *t) +{ + return t->nr_regions; +} + +struct damon_ctx *damon_new_ctx(void) +{ + struct damon_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + init_completion(&ctx->kdamond_started); + + ctx->attrs.sample_interval = 5 * 1000; + ctx->attrs.aggr_interval = 100 * 1000; + ctx->attrs.ops_update_interval = 60 * 1000 * 1000; + + ctx->passed_sample_intervals = 0; + /* These will be set from kdamond_init_ctx() */ + ctx->next_aggregation_sis = 0; + ctx->next_ops_update_sis = 0; + + mutex_init(&ctx->kdamond_lock); + INIT_LIST_HEAD(&ctx->call_controls); + mutex_init(&ctx->call_controls_lock); + mutex_init(&ctx->walk_control_lock); + + ctx->attrs.min_nr_regions = 10; + ctx->attrs.max_nr_regions = 1000; + + ctx->addr_unit = 1; + ctx->min_sz_region = DAMON_MIN_REGION; + + INIT_LIST_HEAD(&ctx->adaptive_targets); + INIT_LIST_HEAD(&ctx->schemes); + + return ctx; +} + +static void damon_destroy_targets(struct damon_ctx *ctx) +{ + struct damon_target *t, *next_t; + + damon_for_each_target_safe(t, next_t, ctx) + damon_destroy_target(t, ctx); +} + +void damon_destroy_ctx(struct damon_ctx *ctx) +{ + struct damos *s, *next_s; + + damon_destroy_targets(ctx); + + damon_for_each_scheme_safe(s, next_s, ctx) + damon_destroy_scheme(s); + + kfree(ctx); +} + +static bool damon_attrs_equals(const struct damon_attrs *attrs1, + const struct damon_attrs *attrs2) +{ + const struct damon_intervals_goal *ig1 = &attrs1->intervals_goal; + const struct damon_intervals_goal *ig2 = &attrs2->intervals_goal; + + return attrs1->sample_interval == attrs2->sample_interval && + attrs1->aggr_interval == attrs2->aggr_interval && + attrs1->ops_update_interval == attrs2->ops_update_interval && + attrs1->min_nr_regions == attrs2->min_nr_regions && + attrs1->max_nr_regions == attrs2->max_nr_regions && + ig1->access_bp == ig2->access_bp && + ig1->aggrs == ig2->aggrs && + ig1->min_sample_us == ig2->min_sample_us && + ig1->max_sample_us == ig2->max_sample_us; +} + +static unsigned int damon_age_for_new_attrs(unsigned int age, + struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) +{ + return age * old_attrs->aggr_interval / new_attrs->aggr_interval; +} + +/* convert access ratio in bp (per 10,000) to nr_accesses */ +static unsigned int damon_accesses_bp_to_nr_accesses( + unsigned int accesses_bp, struct damon_attrs *attrs) +{ + return accesses_bp * damon_max_nr_accesses(attrs) / 10000; +} + +/* + * Convert nr_accesses to access ratio in bp (per 10,000). + * + * Callers should ensure attrs.aggr_interval is not zero, like + * damon_update_monitoring_results() does . Otherwise, divide-by-zero would + * happen. + */ +static unsigned int damon_nr_accesses_to_accesses_bp( + unsigned int nr_accesses, struct damon_attrs *attrs) +{ + return nr_accesses * 10000 / damon_max_nr_accesses(attrs); +} + +static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses, + struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) +{ + return damon_accesses_bp_to_nr_accesses( + damon_nr_accesses_to_accesses_bp( + nr_accesses, old_attrs), + new_attrs); +} + +static void damon_update_monitoring_result(struct damon_region *r, + struct damon_attrs *old_attrs, struct damon_attrs *new_attrs, + bool aggregating) +{ + if (!aggregating) { + r->nr_accesses = damon_nr_accesses_for_new_attrs( + r->nr_accesses, old_attrs, new_attrs); + r->nr_accesses_bp = r->nr_accesses * 10000; + } else { + /* + * if this is called in the middle of the aggregation, reset + * the aggregations we made so far for this aggregation + * interval. In other words, make the status like + * kdamond_reset_aggregated() is called. + */ + r->last_nr_accesses = damon_nr_accesses_for_new_attrs( + r->last_nr_accesses, old_attrs, new_attrs); + r->nr_accesses_bp = r->last_nr_accesses * 10000; + r->nr_accesses = 0; + } + r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs); +} + +/* + * region->nr_accesses is the number of sampling intervals in the last + * aggregation interval that access to the region has found, and region->age is + * the number of aggregation intervals that its access pattern has maintained. + * For the reason, the real meaning of the two fields depend on current + * sampling interval and aggregation interval. This function updates + * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs. + */ +static void damon_update_monitoring_results(struct damon_ctx *ctx, + struct damon_attrs *new_attrs, bool aggregating) +{ + struct damon_attrs *old_attrs = &ctx->attrs; + struct damon_target *t; + struct damon_region *r; + + /* if any interval is zero, simply forgive conversion */ + if (!old_attrs->sample_interval || !old_attrs->aggr_interval || + !new_attrs->sample_interval || + !new_attrs->aggr_interval) + return; + + damon_for_each_target(t, ctx) + damon_for_each_region(r, t) + damon_update_monitoring_result( + r, old_attrs, new_attrs, aggregating); +} + +/* + * damon_valid_intervals_goal() - return if the intervals goal of @attrs is + * valid. + */ +static bool damon_valid_intervals_goal(struct damon_attrs *attrs) +{ + struct damon_intervals_goal *goal = &attrs->intervals_goal; + + /* tuning is disabled */ + if (!goal->aggrs) + return true; + if (goal->min_sample_us > goal->max_sample_us) + return false; + if (attrs->sample_interval < goal->min_sample_us || + goal->max_sample_us < attrs->sample_interval) + return false; + return true; +} + +/** + * damon_set_attrs() - Set attributes for the monitoring. + * @ctx: monitoring context + * @attrs: monitoring attributes + * + * This function should be called while the kdamond is not running, an access + * check results aggregation is not ongoing (e.g., from damon_call(). + * + * Every time interval is in micro-seconds. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) +{ + unsigned long sample_interval = attrs->sample_interval ? + attrs->sample_interval : 1; + struct damos *s; + bool aggregating = ctx->passed_sample_intervals < + ctx->next_aggregation_sis; + + if (!damon_valid_intervals_goal(attrs)) + return -EINVAL; + + if (attrs->min_nr_regions < 3) + return -EINVAL; + if (attrs->min_nr_regions > attrs->max_nr_regions) + return -EINVAL; + if (attrs->sample_interval > attrs->aggr_interval) + return -EINVAL; + + /* calls from core-external doesn't set this. */ + if (!attrs->aggr_samples) + attrs->aggr_samples = attrs->aggr_interval / sample_interval; + + ctx->next_aggregation_sis = ctx->passed_sample_intervals + + attrs->aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->passed_sample_intervals + + attrs->ops_update_interval / sample_interval; + + damon_update_monitoring_results(ctx, attrs, aggregating); + ctx->attrs = *attrs; + + damon_for_each_scheme(s, ctx) + damos_set_next_apply_sis(s, ctx); + + return 0; +} + +/** + * damon_set_schemes() - Set data access monitoring based operation schemes. + * @ctx: monitoring context + * @schemes: array of the schemes + * @nr_schemes: number of entries in @schemes + * + * This function should not be called while the kdamond of the context is + * running. + */ +void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, + ssize_t nr_schemes) +{ + struct damos *s, *next; + ssize_t i; + + damon_for_each_scheme_safe(s, next, ctx) + damon_destroy_scheme(s); + for (i = 0; i < nr_schemes; i++) + damon_add_scheme(ctx, schemes[i]); +} + +static struct damos_quota_goal *damos_nth_quota_goal( + int n, struct damos_quota *q) +{ + struct damos_quota_goal *goal; + int i = 0; + + damos_for_each_quota_goal(goal, q) { + if (i++ == n) + return goal; + } + return NULL; +} + +static void damos_commit_quota_goal_union( + struct damos_quota_goal *dst, struct damos_quota_goal *src) +{ + switch (dst->metric) { + case DAMOS_QUOTA_NODE_MEM_USED_BP: + case DAMOS_QUOTA_NODE_MEM_FREE_BP: + dst->nid = src->nid; + break; + case DAMOS_QUOTA_NODE_MEMCG_USED_BP: + case DAMOS_QUOTA_NODE_MEMCG_FREE_BP: + dst->nid = src->nid; + dst->memcg_id = src->memcg_id; + break; + default: + break; + } +} + +static void damos_commit_quota_goal( + struct damos_quota_goal *dst, struct damos_quota_goal *src) +{ + dst->metric = src->metric; + dst->target_value = src->target_value; + if (dst->metric == DAMOS_QUOTA_USER_INPUT) + dst->current_value = src->current_value; + /* keep last_psi_total as is, since it will be updated in next cycle */ + damos_commit_quota_goal_union(dst, src); +} + +/** + * damos_commit_quota_goals() - Commit DAMOS quota goals to another quota. + * @dst: The commit destination DAMOS quota. + * @src: The commit source DAMOS quota. + * + * Copies user-specified parameters for quota goals from @src to @dst. Users + * should use this function for quota goals-level parameters update of running + * DAMON contexts, instead of manual in-place updates. + * + * This function should be called from parameters-update safe context, like + * damon_call(). + */ +int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src) +{ + struct damos_quota_goal *dst_goal, *next, *src_goal, *new_goal; + int i = 0, j = 0; + + damos_for_each_quota_goal_safe(dst_goal, next, dst) { + src_goal = damos_nth_quota_goal(i++, src); + if (src_goal) + damos_commit_quota_goal(dst_goal, src_goal); + else + damos_destroy_quota_goal(dst_goal); + } + damos_for_each_quota_goal_safe(src_goal, next, src) { + if (j++ < i) + continue; + new_goal = damos_new_quota_goal( + src_goal->metric, src_goal->target_value); + if (!new_goal) + return -ENOMEM; + damos_commit_quota_goal(new_goal, src_goal); + damos_add_quota_goal(dst, new_goal); + } + return 0; +} + +static int damos_commit_quota(struct damos_quota *dst, struct damos_quota *src) +{ + int err; + + dst->reset_interval = src->reset_interval; + dst->ms = src->ms; + dst->sz = src->sz; + err = damos_commit_quota_goals(dst, src); + if (err) + return err; + dst->weight_sz = src->weight_sz; + dst->weight_nr_accesses = src->weight_nr_accesses; + dst->weight_age = src->weight_age; + return 0; +} + +static struct damos_filter *damos_nth_core_filter(int n, struct damos *s) +{ + struct damos_filter *filter; + int i = 0; + + damos_for_each_core_filter(filter, s) { + if (i++ == n) + return filter; + } + return NULL; +} + +static struct damos_filter *damos_nth_ops_filter(int n, struct damos *s) +{ + struct damos_filter *filter; + int i = 0; + + damos_for_each_ops_filter(filter, s) { + if (i++ == n) + return filter; + } + return NULL; +} + +static void damos_commit_filter_arg( + struct damos_filter *dst, struct damos_filter *src) +{ + switch (dst->type) { + case DAMOS_FILTER_TYPE_MEMCG: + dst->memcg_id = src->memcg_id; + break; + case DAMOS_FILTER_TYPE_ADDR: + dst->addr_range = src->addr_range; + break; + case DAMOS_FILTER_TYPE_TARGET: + dst->target_idx = src->target_idx; + break; + case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: + dst->sz_range = src->sz_range; + break; + default: + break; + } +} + +static void damos_commit_filter( + struct damos_filter *dst, struct damos_filter *src) +{ + dst->type = src->type; + dst->matching = src->matching; + dst->allow = src->allow; + damos_commit_filter_arg(dst, src); +} + +static int damos_commit_core_filters(struct damos *dst, struct damos *src) +{ + struct damos_filter *dst_filter, *next, *src_filter, *new_filter; + int i = 0, j = 0; + + damos_for_each_core_filter_safe(dst_filter, next, dst) { + src_filter = damos_nth_core_filter(i++, src); + if (src_filter) + damos_commit_filter(dst_filter, src_filter); + else + damos_destroy_filter(dst_filter); + } + + damos_for_each_core_filter_safe(src_filter, next, src) { + if (j++ < i) + continue; + + new_filter = damos_new_filter( + src_filter->type, src_filter->matching, + src_filter->allow); + if (!new_filter) + return -ENOMEM; + damos_commit_filter_arg(new_filter, src_filter); + damos_add_filter(dst, new_filter); + } + return 0; +} + +static int damos_commit_ops_filters(struct damos *dst, struct damos *src) +{ + struct damos_filter *dst_filter, *next, *src_filter, *new_filter; + int i = 0, j = 0; + + damos_for_each_ops_filter_safe(dst_filter, next, dst) { + src_filter = damos_nth_ops_filter(i++, src); + if (src_filter) + damos_commit_filter(dst_filter, src_filter); + else + damos_destroy_filter(dst_filter); + } + + damos_for_each_ops_filter_safe(src_filter, next, src) { + if (j++ < i) + continue; + + new_filter = damos_new_filter( + src_filter->type, src_filter->matching, + src_filter->allow); + if (!new_filter) + return -ENOMEM; + damos_commit_filter_arg(new_filter, src_filter); + damos_add_filter(dst, new_filter); + } + return 0; +} + +/** + * damos_filters_default_reject() - decide whether to reject memory that didn't + * match with any given filter. + * @filters: Given DAMOS filters of a group. + */ +static bool damos_filters_default_reject(struct list_head *filters) +{ + struct damos_filter *last_filter; + + if (list_empty(filters)) + return false; + last_filter = list_last_entry(filters, struct damos_filter, list); + return last_filter->allow; +} + +static void damos_set_filters_default_reject(struct damos *s) +{ + if (!list_empty(&s->ops_filters)) + s->core_filters_default_reject = false; + else + s->core_filters_default_reject = + damos_filters_default_reject(&s->core_filters); + s->ops_filters_default_reject = + damos_filters_default_reject(&s->ops_filters); +} + +static int damos_commit_dests(struct damos_migrate_dests *dst, + struct damos_migrate_dests *src) +{ + if (dst->nr_dests != src->nr_dests) { + kfree(dst->node_id_arr); + kfree(dst->weight_arr); + + dst->node_id_arr = kmalloc_array(src->nr_dests, + sizeof(*dst->node_id_arr), GFP_KERNEL); + if (!dst->node_id_arr) { + dst->weight_arr = NULL; + return -ENOMEM; + } + + dst->weight_arr = kmalloc_array(src->nr_dests, + sizeof(*dst->weight_arr), GFP_KERNEL); + if (!dst->weight_arr) { + /* ->node_id_arr will be freed by scheme destruction */ + return -ENOMEM; + } + } + + dst->nr_dests = src->nr_dests; + for (int i = 0; i < src->nr_dests; i++) { + dst->node_id_arr[i] = src->node_id_arr[i]; + dst->weight_arr[i] = src->weight_arr[i]; + } + + return 0; +} + +static int damos_commit_filters(struct damos *dst, struct damos *src) +{ + int err; + + err = damos_commit_core_filters(dst, src); + if (err) + return err; + err = damos_commit_ops_filters(dst, src); + if (err) + return err; + damos_set_filters_default_reject(dst); + return 0; +} + +static struct damos *damon_nth_scheme(int n, struct damon_ctx *ctx) +{ + struct damos *s; + int i = 0; + + damon_for_each_scheme(s, ctx) { + if (i++ == n) + return s; + } + return NULL; +} + +static int damos_commit(struct damos *dst, struct damos *src) +{ + int err; + + dst->pattern = src->pattern; + dst->action = src->action; + dst->apply_interval_us = src->apply_interval_us; + + err = damos_commit_quota(&dst->quota, &src->quota); + if (err) + return err; + + dst->wmarks = src->wmarks; + dst->target_nid = src->target_nid; + + err = damos_commit_dests(&dst->migrate_dests, &src->migrate_dests); + if (err) + return err; + + err = damos_commit_filters(dst, src); + return err; +} + +static int damon_commit_schemes(struct damon_ctx *dst, struct damon_ctx *src) +{ + struct damos *dst_scheme, *next, *src_scheme, *new_scheme; + int i = 0, j = 0, err; + + damon_for_each_scheme_safe(dst_scheme, next, dst) { + src_scheme = damon_nth_scheme(i++, src); + if (src_scheme) { + err = damos_commit(dst_scheme, src_scheme); + if (err) + return err; + } else { + damon_destroy_scheme(dst_scheme); + } + } + + damon_for_each_scheme_safe(src_scheme, next, src) { + if (j++ < i) + continue; + new_scheme = damon_new_scheme(&src_scheme->pattern, + src_scheme->action, + src_scheme->apply_interval_us, + &src_scheme->quota, &src_scheme->wmarks, + NUMA_NO_NODE); + if (!new_scheme) + return -ENOMEM; + err = damos_commit(new_scheme, src_scheme); + if (err) { + damon_destroy_scheme(new_scheme); + return err; + } + damon_add_scheme(dst, new_scheme); + } + return 0; +} + +static struct damon_target *damon_nth_target(int n, struct damon_ctx *ctx) +{ + struct damon_target *t; + int i = 0; + + damon_for_each_target(t, ctx) { + if (i++ == n) + return t; + } + return NULL; +} + +/* + * The caller should ensure the regions of @src are + * 1. valid (end >= src) and + * 2. sorted by starting address. + * + * If @src has no region, @dst keeps current regions. + */ +static int damon_commit_target_regions(struct damon_target *dst, + struct damon_target *src, unsigned long src_min_sz_region) +{ + struct damon_region *src_region; + struct damon_addr_range *ranges; + int i = 0, err; + + damon_for_each_region(src_region, src) + i++; + if (!i) + return 0; + + ranges = kmalloc_array(i, sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN); + if (!ranges) + return -ENOMEM; + i = 0; + damon_for_each_region(src_region, src) + ranges[i++] = src_region->ar; + err = damon_set_regions(dst, ranges, i, src_min_sz_region); + kfree(ranges); + return err; +} + +static int damon_commit_target( + struct damon_target *dst, bool dst_has_pid, + struct damon_target *src, bool src_has_pid, + unsigned long src_min_sz_region) +{ + int err; + + err = damon_commit_target_regions(dst, src, src_min_sz_region); + if (err) + return err; + if (dst_has_pid) + put_pid(dst->pid); + if (src_has_pid) + get_pid(src->pid); + dst->pid = src->pid; + return 0; +} + +static int damon_commit_targets( + struct damon_ctx *dst, struct damon_ctx *src) +{ + struct damon_target *dst_target, *next, *src_target, *new_target; + int i = 0, j = 0, err; + + damon_for_each_target_safe(dst_target, next, dst) { + src_target = damon_nth_target(i++, src); + /* + * If src target is obsolete, do not commit the parameters to + * the dst target, and further remove the dst target. + */ + if (src_target && !src_target->obsolete) { + err = damon_commit_target( + dst_target, damon_target_has_pid(dst), + src_target, damon_target_has_pid(src), + src->min_sz_region); + if (err) + return err; + } else { + struct damos *s; + + damon_destroy_target(dst_target, dst); + damon_for_each_scheme(s, dst) { + if (s->quota.charge_target_from == dst_target) { + s->quota.charge_target_from = NULL; + s->quota.charge_addr_from = 0; + } + } + } + } + + damon_for_each_target_safe(src_target, next, src) { + if (j++ < i) + continue; + /* target to remove has no matching dst */ + if (src_target->obsolete) + return -EINVAL; + new_target = damon_new_target(); + if (!new_target) + return -ENOMEM; + err = damon_commit_target(new_target, false, + src_target, damon_target_has_pid(src), + src->min_sz_region); + if (err) { + damon_destroy_target(new_target, NULL); + return err; + } + damon_add_target(dst, new_target); + } + return 0; +} + +/** + * damon_commit_ctx() - Commit parameters of a DAMON context to another. + * @dst: The commit destination DAMON context. + * @src: The commit source DAMON context. + * + * This function copies user-specified parameters from @src to @dst and update + * the internal status and results accordingly. Users should use this function + * for context-level parameters update of running context, instead of manual + * in-place updates. + * + * This function should be called from parameters-update safe context, like + * damon_call(). + */ +int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) +{ + int err; + + err = damon_commit_schemes(dst, src); + if (err) + return err; + err = damon_commit_targets(dst, src); + if (err) + return err; + /* + * schemes and targets should be updated first, since + * 1. damon_set_attrs() updates monitoring results of targets and + * next_apply_sis of schemes, and + * 2. ops update should be done after pid handling is done (target + * committing require putting pids). + */ + if (!damon_attrs_equals(&dst->attrs, &src->attrs)) { + err = damon_set_attrs(dst, &src->attrs); + if (err) + return err; + } + dst->ops = src->ops; + dst->addr_unit = src->addr_unit; + dst->min_sz_region = src->min_sz_region; + + return 0; +} + +/** + * damon_nr_running_ctxs() - Return number of currently running contexts. + */ +int damon_nr_running_ctxs(void) +{ + int nr_ctxs; + + mutex_lock(&damon_lock); + nr_ctxs = nr_running_ctxs; + mutex_unlock(&damon_lock); + + return nr_ctxs; +} + +/* Returns the size upper limit for each monitoring region */ +static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sz = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) + sz += damon_sz_region(r); + } + + if (ctx->attrs.min_nr_regions) + sz /= ctx->attrs.min_nr_regions; + if (sz < ctx->min_sz_region) + sz = ctx->min_sz_region; + + return sz; +} + +static int kdamond_fn(void *data); + +/* + * __damon_start() - Starts monitoring with given context. + * @ctx: monitoring context + * + * This function should be called while damon_lock is hold. + * + * Return: 0 on success, negative error code otherwise. + */ +static int __damon_start(struct damon_ctx *ctx) +{ + int err = -EBUSY; + + mutex_lock(&ctx->kdamond_lock); + if (!ctx->kdamond) { + err = 0; + reinit_completion(&ctx->kdamond_started); + ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", + nr_running_ctxs); + if (IS_ERR(ctx->kdamond)) { + err = PTR_ERR(ctx->kdamond); + ctx->kdamond = NULL; + } else { + wait_for_completion(&ctx->kdamond_started); + } + } + mutex_unlock(&ctx->kdamond_lock); + + return err; +} + +/** + * damon_start() - Starts the monitorings for a given group of contexts. + * @ctxs: an array of the pointers for contexts to start monitoring + * @nr_ctxs: size of @ctxs + * @exclusive: exclusiveness of this contexts group + * + * This function starts a group of monitoring threads for a group of monitoring + * contexts. One thread per each context is created and run in parallel. The + * caller should handle synchronization between the threads by itself. If + * @exclusive is true and a group of threads that created by other + * 'damon_start()' call is currently running, this function does nothing but + * returns -EBUSY. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive) +{ + int i; + int err = 0; + + mutex_lock(&damon_lock); + if ((exclusive && nr_running_ctxs) || + (!exclusive && running_exclusive_ctxs)) { + mutex_unlock(&damon_lock); + return -EBUSY; + } + + for (i = 0; i < nr_ctxs; i++) { + err = __damon_start(ctxs[i]); + if (err) + break; + nr_running_ctxs++; + } + if (exclusive && nr_running_ctxs) + running_exclusive_ctxs = true; + mutex_unlock(&damon_lock); + + return err; +} + +/* + * __damon_stop() - Stops monitoring of a given context. + * @ctx: monitoring context + * + * Return: 0 on success, negative error code otherwise. + */ +static int __damon_stop(struct damon_ctx *ctx) +{ + struct task_struct *tsk; + + mutex_lock(&ctx->kdamond_lock); + tsk = ctx->kdamond; + if (tsk) { + get_task_struct(tsk); + mutex_unlock(&ctx->kdamond_lock); + kthread_stop_put(tsk); + return 0; + } + mutex_unlock(&ctx->kdamond_lock); + + return -EPERM; +} + +/** + * damon_stop() - Stops the monitorings for a given group of contexts. + * @ctxs: an array of the pointers for contexts to stop monitoring + * @nr_ctxs: size of @ctxs + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) +{ + int i, err = 0; + + for (i = 0; i < nr_ctxs; i++) { + /* nr_running_ctxs is decremented in kdamond_fn */ + err = __damon_stop(ctxs[i]); + if (err) + break; + } + return err; +} + +/** + * damon_is_running() - Returns if a given DAMON context is running. + * @ctx: The DAMON context to see if running. + * + * Return: true if @ctx is running, false otherwise. + */ +bool damon_is_running(struct damon_ctx *ctx) +{ + bool running; + + mutex_lock(&ctx->kdamond_lock); + running = ctx->kdamond != NULL; + mutex_unlock(&ctx->kdamond_lock); + return running; +} + +/** + * damon_call() - Invoke a given function on DAMON worker thread (kdamond). + * @ctx: DAMON context to call the function for. + * @control: Control variable of the call request. + * + * Ask DAMON worker thread (kdamond) of @ctx to call a function with an + * argument data that respectively passed via &damon_call_control->fn and + * &damon_call_control->data of @control. If &damon_call_control->repeat of + * @control is unset, further wait until the kdamond finishes handling of the + * request. Otherwise, return as soon as the request is made. + * + * The kdamond executes the function with the argument in the main loop, just + * after a sampling of the iteration is finished. The function can hence + * safely access the internal data of the &struct damon_ctx without additional + * synchronization. The return value of the function will be saved in + * &damon_call_control->return_code. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) +{ + if (!control->repeat) + init_completion(&control->completion); + control->canceled = false; + INIT_LIST_HEAD(&control->list); + + mutex_lock(&ctx->call_controls_lock); + list_add_tail(&control->list, &ctx->call_controls); + mutex_unlock(&ctx->call_controls_lock); + if (!damon_is_running(ctx)) + return -EINVAL; + if (control->repeat) + return 0; + wait_for_completion(&control->completion); + if (control->canceled) + return -ECANCELED; + return 0; +} + +/** + * damos_walk() - Invoke a given functions while DAMOS walk regions. + * @ctx: DAMON context to call the functions for. + * @control: Control variable of the walk request. + * + * Ask DAMON worker thread (kdamond) of @ctx to call a function for each region + * that the kdamond will apply DAMOS action to, and wait until the kdamond + * finishes handling of the request. + * + * The kdamond executes the given function in the main loop, for each region + * just after it applied any DAMOS actions of @ctx to it. The invocation is + * made only within one &damos->apply_interval_us since damos_walk() + * invocation, for each scheme. The given callback function can hence safely + * access the internal data of &struct damon_ctx and &struct damon_region that + * each of the scheme will apply the action for next interval, without + * additional synchronizations against the kdamond. If every scheme of @ctx + * passed at least one &damos->apply_interval_us, kdamond marks the request as + * completed so that damos_walk() can wakeup and return. + * + * Return: 0 on success, negative error code otherwise. + */ +int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control) +{ + init_completion(&control->completion); + control->canceled = false; + mutex_lock(&ctx->walk_control_lock); + if (ctx->walk_control) { + mutex_unlock(&ctx->walk_control_lock); + return -EBUSY; + } + ctx->walk_control = control; + mutex_unlock(&ctx->walk_control_lock); + if (!damon_is_running(ctx)) + return -EINVAL; + wait_for_completion(&control->completion); + if (control->canceled) + return -ECANCELED; + return 0; +} + +/* + * Warn and fix corrupted ->nr_accesses[_bp] for investigations and preventing + * the problem being propagated. + */ +static void damon_warn_fix_nr_accesses_corruption(struct damon_region *r) +{ + if (r->nr_accesses_bp == r->nr_accesses * 10000) + return; + WARN_ONCE(true, "invalid nr_accesses_bp at reset: %u %u\n", + r->nr_accesses_bp, r->nr_accesses); + r->nr_accesses_bp = r->nr_accesses * 10000; +} + +/* + * Reset the aggregated monitoring results ('nr_accesses' of each region). + */ +static void kdamond_reset_aggregated(struct damon_ctx *c) +{ + struct damon_target *t; + unsigned int ti = 0; /* target's index */ + + damon_for_each_target(t, c) { + struct damon_region *r; + + damon_for_each_region(r, t) { + trace_damon_aggregated(ti, r, damon_nr_regions(t)); + damon_warn_fix_nr_accesses_corruption(r); + r->last_nr_accesses = r->nr_accesses; + r->nr_accesses = 0; + } + ti++; + } +} + +static unsigned long damon_get_intervals_score(struct damon_ctx *c) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sz_region, max_access_events = 0, access_events = 0; + unsigned long target_access_events; + unsigned long goal_bp = c->attrs.intervals_goal.access_bp; + + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + sz_region = damon_sz_region(r); + max_access_events += sz_region * c->attrs.aggr_samples; + access_events += sz_region * r->nr_accesses; + } + } + target_access_events = max_access_events * goal_bp / 10000; + target_access_events = target_access_events ? : 1; + return access_events * 10000 / target_access_events; +} + +static unsigned long damon_feed_loop_next_input(unsigned long last_input, + unsigned long score); + +static unsigned long damon_get_intervals_adaptation_bp(struct damon_ctx *c) +{ + unsigned long score_bp, adaptation_bp; + + score_bp = damon_get_intervals_score(c); + adaptation_bp = damon_feed_loop_next_input(100000000, score_bp) / + 10000; + /* + * adaptaion_bp ranges from 1 to 20,000. Avoid too rapid reduction of + * the intervals by rescaling [1,10,000] to [5000, 10,000]. + */ + if (adaptation_bp <= 10000) + adaptation_bp = 5000 + adaptation_bp / 2; + return adaptation_bp; +} + +static void kdamond_tune_intervals(struct damon_ctx *c) +{ + unsigned long adaptation_bp; + struct damon_attrs new_attrs; + struct damon_intervals_goal *goal; + + adaptation_bp = damon_get_intervals_adaptation_bp(c); + if (adaptation_bp == 10000) + return; + + new_attrs = c->attrs; + goal = &c->attrs.intervals_goal; + new_attrs.sample_interval = min(goal->max_sample_us, + c->attrs.sample_interval * adaptation_bp / 10000); + new_attrs.sample_interval = max(goal->min_sample_us, + new_attrs.sample_interval); + new_attrs.aggr_interval = new_attrs.sample_interval * + c->attrs.aggr_samples; + trace_damon_monitor_intervals_tune(new_attrs.sample_interval); + damon_set_attrs(c, &new_attrs); +} + +static void damon_split_region_at(struct damon_target *t, + struct damon_region *r, unsigned long sz_r); + +static bool __damos_valid_target(struct damon_region *r, struct damos *s) +{ + unsigned long sz; + unsigned int nr_accesses = r->nr_accesses_bp / 10000; + + sz = damon_sz_region(r); + return s->pattern.min_sz_region <= sz && + sz <= s->pattern.max_sz_region && + s->pattern.min_nr_accesses <= nr_accesses && + nr_accesses <= s->pattern.max_nr_accesses && + s->pattern.min_age_region <= r->age && + r->age <= s->pattern.max_age_region; +} + +static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + bool ret = __damos_valid_target(r, s); + + if (!ret || !s->quota.esz || !c->ops.get_scheme_score) + return ret; + + return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score; +} + +/* + * damos_skip_charged_region() - Check if the given region or starting part of + * it is already charged for the DAMOS quota. + * @t: The target of the region. + * @rp: The pointer to the region. + * @s: The scheme to be applied. + * @min_sz_region: minimum region size. + * + * If a quota of a scheme has exceeded in a quota charge window, the scheme's + * action would applied to only a part of the target access pattern fulfilling + * regions. To avoid applying the scheme action to only already applied + * regions, DAMON skips applying the scheme action to the regions that charged + * in the previous charge window. + * + * This function checks if a given region should be skipped or not for the + * reason. If only the starting part of the region has previously charged, + * this function splits the region into two so that the second one covers the + * area that not charged in the previous charge widnow and saves the second + * region in *rp and returns false, so that the caller can apply DAMON action + * to the second one. + * + * Return: true if the region should be entirely skipped, false otherwise. + */ +static bool damos_skip_charged_region(struct damon_target *t, + struct damon_region **rp, struct damos *s, unsigned long min_sz_region) +{ + struct damon_region *r = *rp; + struct damos_quota *quota = &s->quota; + unsigned long sz_to_skip; + + /* Skip previously charged regions */ + if (quota->charge_target_from) { + if (t != quota->charge_target_from) + return true; + if (r == damon_last_region(t)) { + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + return true; + } + if (quota->charge_addr_from && + r->ar.end <= quota->charge_addr_from) + return true; + + if (quota->charge_addr_from && r->ar.start < + quota->charge_addr_from) { + sz_to_skip = ALIGN_DOWN(quota->charge_addr_from - + r->ar.start, min_sz_region); + if (!sz_to_skip) { + if (damon_sz_region(r) <= min_sz_region) + return true; + sz_to_skip = min_sz_region; + } + damon_split_region_at(t, r, sz_to_skip); + r = damon_next_region(r); + *rp = r; + } + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + } + return false; +} + +static void damos_update_stat(struct damos *s, + unsigned long sz_tried, unsigned long sz_applied, + unsigned long sz_ops_filter_passed) +{ + s->stat.nr_tried++; + s->stat.sz_tried += sz_tried; + if (sz_applied) + s->stat.nr_applied++; + s->stat.sz_applied += sz_applied; + s->stat.sz_ops_filter_passed += sz_ops_filter_passed; +} + +static bool damos_filter_match(struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos_filter *filter, + unsigned long min_sz_region) +{ + bool matched = false; + struct damon_target *ti; + int target_idx = 0; + unsigned long start, end; + + switch (filter->type) { + case DAMOS_FILTER_TYPE_TARGET: + damon_for_each_target(ti, ctx) { + if (ti == t) + break; + target_idx++; + } + matched = target_idx == filter->target_idx; + break; + case DAMOS_FILTER_TYPE_ADDR: + start = ALIGN_DOWN(filter->addr_range.start, min_sz_region); + end = ALIGN_DOWN(filter->addr_range.end, min_sz_region); + + /* inside the range */ + if (start <= r->ar.start && r->ar.end <= end) { + matched = true; + break; + } + /* outside of the range */ + if (r->ar.end <= start || end <= r->ar.start) { + matched = false; + break; + } + /* start before the range and overlap */ + if (r->ar.start < start) { + damon_split_region_at(t, r, start - r->ar.start); + matched = false; + break; + } + /* start inside the range */ + damon_split_region_at(t, r, end - r->ar.start); + matched = true; + break; + default: + return false; + } + + return matched == filter->matching; +} + +static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + struct damos_filter *filter; + + s->core_filters_allowed = false; + damos_for_each_core_filter(filter, s) { + if (damos_filter_match(ctx, t, r, filter, ctx->min_sz_region)) { + if (filter->allow) + s->core_filters_allowed = true; + return !filter->allow; + } + } + return s->core_filters_default_reject; +} + +/* + * damos_walk_call_walk() - Call &damos_walk_control->walk_fn. + * @ctx: The context of &damon_ctx->walk_control. + * @t: The monitoring target of @r that @s will be applied. + * @r: The region of @t that @s will be applied. + * @s: The scheme of @ctx that will be applied to @r. + * + * This function is called from kdamond whenever it asked the operation set to + * apply a DAMOS scheme action to a region. If a DAMOS walk request is + * installed by damos_walk() and not yet uninstalled, invoke it. + */ +static void damos_walk_call_walk(struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos *s, + unsigned long sz_filter_passed) +{ + struct damos_walk_control *control; + + if (s->walk_completed) + return; + + control = ctx->walk_control; + if (!control) + return; + + control->walk_fn(control->data, ctx, t, r, s, sz_filter_passed); +} + +/* + * damos_walk_complete() - Complete DAMOS walk request if all walks are done. + * @ctx: The context of &damon_ctx->walk_control. + * @s: A scheme of @ctx that all walks are now done. + * + * This function is called when kdamond finished applying the action of a DAMOS + * scheme to all regions that eligible for the given &damos->apply_interval_us. + * If every scheme of @ctx including @s now finished walking for at least one + * &damos->apply_interval_us, this function makrs the handling of the given + * DAMOS walk request is done, so that damos_walk() can wake up and return. + */ +static void damos_walk_complete(struct damon_ctx *ctx, struct damos *s) +{ + struct damos *siter; + struct damos_walk_control *control; + + control = ctx->walk_control; + if (!control) + return; + + s->walk_completed = true; + /* if all schemes completed, signal completion to walker */ + damon_for_each_scheme(siter, ctx) { + if (!siter->walk_completed) + return; + } + damon_for_each_scheme(siter, ctx) + siter->walk_completed = false; + + complete(&control->completion); + ctx->walk_control = NULL; +} + +/* + * damos_walk_cancel() - Cancel the current DAMOS walk request. + * @ctx: The context of &damon_ctx->walk_control. + * + * This function is called when @ctx is deactivated by DAMOS watermarks, DAMOS + * walk is requested but there is no DAMOS scheme to walk for, or the kdamond + * is already out of the main loop and therefore gonna be terminated, and hence + * cannot continue the walks. This function therefore marks the walk request + * as canceled, so that damos_walk() can wake up and return. + */ +static void damos_walk_cancel(struct damon_ctx *ctx) +{ + struct damos_walk_control *control; + + mutex_lock(&ctx->walk_control_lock); + control = ctx->walk_control; + mutex_unlock(&ctx->walk_control_lock); + + if (!control) + return; + control->canceled = true; + complete(&control->completion); + mutex_lock(&ctx->walk_control_lock); + ctx->walk_control = NULL; + mutex_unlock(&ctx->walk_control_lock); +} + +static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + struct damos_quota *quota = &s->quota; + unsigned long sz = damon_sz_region(r); + struct timespec64 begin, end; + unsigned long sz_applied = 0; + unsigned long sz_ops_filter_passed = 0; + /* + * We plan to support multiple context per kdamond, as DAMON sysfs + * implies with 'nr_contexts' file. Nevertheless, only single context + * per kdamond is supported for now. So, we can simply use '0' context + * index here. + */ + unsigned int cidx = 0; + struct damos *siter; /* schemes iterator */ + unsigned int sidx = 0; + struct damon_target *titer; /* targets iterator */ + unsigned int tidx = 0; + bool do_trace = false; + + /* get indices for trace_damos_before_apply() */ + if (trace_damos_before_apply_enabled()) { + damon_for_each_scheme(siter, c) { + if (siter == s) + break; + sidx++; + } + damon_for_each_target(titer, c) { + if (titer == t) + break; + tidx++; + } + do_trace = true; + } + + if (c->ops.apply_scheme) { + if (quota->esz && quota->charged_sz + sz > quota->esz) { + sz = ALIGN_DOWN(quota->esz - quota->charged_sz, + c->min_sz_region); + if (!sz) + goto update_stat; + damon_split_region_at(t, r, sz); + } + if (damos_filter_out(c, t, r, s)) + return; + ktime_get_coarse_ts64(&begin); + trace_damos_before_apply(cidx, sidx, tidx, r, + damon_nr_regions(t), do_trace); + sz_applied = c->ops.apply_scheme(c, t, r, s, + &sz_ops_filter_passed); + damos_walk_call_walk(c, t, r, s, sz_ops_filter_passed); + ktime_get_coarse_ts64(&end); + quota->total_charged_ns += timespec64_to_ns(&end) - + timespec64_to_ns(&begin); + quota->charged_sz += sz; + if (quota->esz && quota->charged_sz >= quota->esz) { + quota->charge_target_from = t; + quota->charge_addr_from = r->ar.end + 1; + } + } + if (s->action != DAMOS_STAT) + r->age = 0; + +update_stat: + damos_update_stat(s, sz, sz_applied, sz_ops_filter_passed); +} + +static void damon_do_apply_schemes(struct damon_ctx *c, + struct damon_target *t, + struct damon_region *r) +{ + struct damos *s; + + damon_for_each_scheme(s, c) { + struct damos_quota *quota = &s->quota; + + if (c->passed_sample_intervals < s->next_apply_sis) + continue; + + if (!s->wmarks.activated) + continue; + + /* Check the quota */ + if (quota->esz && quota->charged_sz >= quota->esz) + continue; + + if (damos_skip_charged_region(t, &r, s, c->min_sz_region)) + continue; + + if (!damos_valid_target(c, t, r, s)) + continue; + + damos_apply_scheme(c, t, r, s); + } +} + +/* + * damon_feed_loop_next_input() - get next input to achieve a target score. + * @last_input The last input. + * @score Current score that made with @last_input. + * + * Calculate next input to achieve the target score, based on the last input + * and current score. Assuming the input and the score are positively + * proportional, calculate how much compensation should be added to or + * subtracted from the last input as a proportion of the last input. Avoid + * next input always being zero by setting it non-zero always. In short form + * (assuming support of float and signed calculations), the algorithm is as + * below. + * + * next_input = max(last_input * ((goal - current) / goal + 1), 1) + * + * For simple implementation, we assume the target score is always 10,000. The + * caller should adjust @score for this. + * + * Returns next input that assumed to achieve the target score. + */ +static unsigned long damon_feed_loop_next_input(unsigned long last_input, + unsigned long score) +{ + const unsigned long goal = 10000; + /* Set minimum input as 10000 to avoid compensation be zero */ + const unsigned long min_input = 10000; + unsigned long score_goal_diff, compensation; + bool over_achieving = score > goal; + + if (score == goal) + return last_input; + if (score >= goal * 2) + return min_input; + + if (over_achieving) + score_goal_diff = score - goal; + else + score_goal_diff = goal - score; + + if (last_input < ULONG_MAX / score_goal_diff) + compensation = last_input * score_goal_diff / goal; + else + compensation = last_input / goal * score_goal_diff; + + if (over_achieving) + return max(last_input - compensation, min_input); + if (last_input < ULONG_MAX - compensation) + return last_input + compensation; + return ULONG_MAX; +} + +#ifdef CONFIG_PSI + +static u64 damos_get_some_mem_psi_total(void) +{ + if (static_branch_likely(&psi_disabled)) + return 0; + return div_u64(psi_system.total[PSI_AVGS][PSI_MEM * 2], + NSEC_PER_USEC); +} + +#else /* CONFIG_PSI */ + +static inline u64 damos_get_some_mem_psi_total(void) +{ + return 0; +}; + +#endif /* CONFIG_PSI */ + +#ifdef CONFIG_NUMA +static __kernel_ulong_t damos_get_node_mem_bp( + struct damos_quota_goal *goal) +{ + struct sysinfo i; + __kernel_ulong_t numerator; + + si_meminfo_node(&i, goal->nid); + if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP) + numerator = i.totalram - i.freeram; + else /* DAMOS_QUOTA_NODE_MEM_FREE_BP */ + numerator = i.freeram; + return numerator * 10000 / i.totalram; +} + +static unsigned long damos_get_node_memcg_used_bp( + struct damos_quota_goal *goal) +{ + struct mem_cgroup *memcg; + struct lruvec *lruvec; + unsigned long used_pages, numerator; + struct sysinfo i; + + rcu_read_lock(); + memcg = mem_cgroup_from_id(goal->memcg_id); + rcu_read_unlock(); + if (!memcg) { + if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) + return 0; + else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */ + return 10000; + } + mem_cgroup_flush_stats(memcg); + lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(goal->nid)); + used_pages = lruvec_page_state(lruvec, NR_ACTIVE_ANON); + used_pages += lruvec_page_state(lruvec, NR_INACTIVE_ANON); + used_pages += lruvec_page_state(lruvec, NR_ACTIVE_FILE); + used_pages += lruvec_page_state(lruvec, NR_INACTIVE_FILE); + + si_meminfo_node(&i, goal->nid); + if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) + numerator = used_pages; + else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */ + numerator = i.totalram - used_pages; + return numerator * 10000 / i.totalram; +} +#else +static __kernel_ulong_t damos_get_node_mem_bp( + struct damos_quota_goal *goal) +{ + return 0; +} + +static unsigned long damos_get_node_memcg_used_bp( + struct damos_quota_goal *goal) +{ + return 0; +} +#endif + + +static void damos_set_quota_goal_current_value(struct damos_quota_goal *goal) +{ + u64 now_psi_total; + + switch (goal->metric) { + case DAMOS_QUOTA_USER_INPUT: + /* User should already set goal->current_value */ + break; + case DAMOS_QUOTA_SOME_MEM_PSI_US: + now_psi_total = damos_get_some_mem_psi_total(); + goal->current_value = now_psi_total - goal->last_psi_total; + goal->last_psi_total = now_psi_total; + break; + case DAMOS_QUOTA_NODE_MEM_USED_BP: + case DAMOS_QUOTA_NODE_MEM_FREE_BP: + goal->current_value = damos_get_node_mem_bp(goal); + break; + case DAMOS_QUOTA_NODE_MEMCG_USED_BP: + case DAMOS_QUOTA_NODE_MEMCG_FREE_BP: + goal->current_value = damos_get_node_memcg_used_bp(goal); + break; + default: + break; + } +} + +/* Return the highest score since it makes schemes least aggressive */ +static unsigned long damos_quota_score(struct damos_quota *quota) +{ + struct damos_quota_goal *goal; + unsigned long highest_score = 0; + + damos_for_each_quota_goal(goal, quota) { + damos_set_quota_goal_current_value(goal); + highest_score = max(highest_score, + goal->current_value * 10000 / + goal->target_value); + } + + return highest_score; +} + +/* + * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty + */ +static void damos_set_effective_quota(struct damos_quota *quota) +{ + unsigned long throughput; + unsigned long esz = ULONG_MAX; + + if (!quota->ms && list_empty("a->goals)) { + quota->esz = quota->sz; + return; + } + + if (!list_empty("a->goals)) { + unsigned long score = damos_quota_score(quota); + + quota->esz_bp = damon_feed_loop_next_input( + max(quota->esz_bp, 10000UL), + score); + esz = quota->esz_bp / 10000; + } + + if (quota->ms) { + if (quota->total_charged_ns) + throughput = mult_frac(quota->total_charged_sz, 1000000, + quota->total_charged_ns); + else + throughput = PAGE_SIZE * 1024; + esz = min(throughput * quota->ms, esz); + } + + if (quota->sz && quota->sz < esz) + esz = quota->sz; + + quota->esz = esz; +} + +static void damos_trace_esz(struct damon_ctx *c, struct damos *s, + struct damos_quota *quota) +{ + unsigned int cidx = 0, sidx = 0; + struct damos *siter; + + damon_for_each_scheme(siter, c) { + if (siter == s) + break; + sidx++; + } + trace_damos_esz(cidx, sidx, quota->esz); +} + +static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) +{ + struct damos_quota *quota = &s->quota; + struct damon_target *t; + struct damon_region *r; + unsigned long cumulated_sz, cached_esz; + unsigned int score, max_score = 0; + + if (!quota->ms && !quota->sz && list_empty("a->goals)) + return; + + /* First charge window */ + if (!quota->total_charged_sz && !quota->charged_from) { + quota->charged_from = jiffies; + damos_set_effective_quota(quota); + } + + /* New charge window starts */ + if (time_after_eq(jiffies, quota->charged_from + + msecs_to_jiffies(quota->reset_interval))) { + if (quota->esz && quota->charged_sz >= quota->esz) + s->stat.qt_exceeds++; + quota->total_charged_sz += quota->charged_sz; + quota->charged_from = jiffies; + quota->charged_sz = 0; + if (trace_damos_esz_enabled()) + cached_esz = quota->esz; + damos_set_effective_quota(quota); + if (trace_damos_esz_enabled() && quota->esz != cached_esz) + damos_trace_esz(c, s, quota); + } + + if (!c->ops.get_scheme_score) + return; + + /* Fill up the score histogram */ + memset(c->regions_score_histogram, 0, + sizeof(*c->regions_score_histogram) * + (DAMOS_MAX_SCORE + 1)); + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + if (!__damos_valid_target(r, s)) + continue; + score = c->ops.get_scheme_score(c, t, r, s); + c->regions_score_histogram[score] += + damon_sz_region(r); + if (score > max_score) + max_score = score; + } + } + + /* Set the min score limit */ + for (cumulated_sz = 0, score = max_score; ; score--) { + cumulated_sz += c->regions_score_histogram[score]; + if (cumulated_sz >= quota->esz || !score) + break; + } + quota->min_score = score; +} + +static void kdamond_apply_schemes(struct damon_ctx *c) +{ + struct damon_target *t; + struct damon_region *r, *next_r; + struct damos *s; + unsigned long sample_interval = c->attrs.sample_interval ? + c->attrs.sample_interval : 1; + bool has_schemes_to_apply = false; + + damon_for_each_scheme(s, c) { + if (c->passed_sample_intervals < s->next_apply_sis) + continue; + + if (!s->wmarks.activated) + continue; + + has_schemes_to_apply = true; + + damos_adjust_quota(c, s); + } + + if (!has_schemes_to_apply) + return; + + mutex_lock(&c->walk_control_lock); + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next_r, t) + damon_do_apply_schemes(c, t, r); + } + + damon_for_each_scheme(s, c) { + if (c->passed_sample_intervals < s->next_apply_sis) + continue; + damos_walk_complete(c, s); + s->next_apply_sis = c->passed_sample_intervals + + (s->apply_interval_us ? s->apply_interval_us : + c->attrs.aggr_interval) / sample_interval; + s->last_applied = NULL; + } + mutex_unlock(&c->walk_control_lock); +} + +/* + * Merge two adjacent regions into one region + */ +static void damon_merge_two_regions(struct damon_target *t, + struct damon_region *l, struct damon_region *r) +{ + unsigned long sz_l = damon_sz_region(l), sz_r = damon_sz_region(r); + + l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / + (sz_l + sz_r); + l->nr_accesses_bp = l->nr_accesses * 10000; + l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); + l->ar.end = r->ar.end; + damon_destroy_region(r, t); +} + +/* + * Merge adjacent regions having similar access frequencies + * + * t target affected by this merge operation + * thres '->nr_accesses' diff threshold for the merge + * sz_limit size upper limit of each region + */ +static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, + unsigned long sz_limit) +{ + struct damon_region *r, *prev = NULL, *next; + + damon_for_each_region_safe(r, next, t) { + if (abs(r->nr_accesses - r->last_nr_accesses) > thres) + r->age = 0; + else if ((r->nr_accesses == 0) != (r->last_nr_accesses == 0)) + r->age = 0; + else + r->age++; + + if (prev && prev->ar.end == r->ar.start && + abs(prev->nr_accesses - r->nr_accesses) <= thres && + damon_sz_region(prev) + damon_sz_region(r) <= sz_limit) + damon_merge_two_regions(t, prev, r); + else + prev = r; + } +} + +/* + * Merge adjacent regions having similar access frequencies + * + * threshold '->nr_accesses' diff threshold for the merge + * sz_limit size upper limit of each region + * + * This function merges monitoring target regions which are adjacent and their + * access frequencies are similar. This is for minimizing the monitoring + * overhead under the dynamically changeable access pattern. If a merge was + * unnecessarily made, later 'kdamond_split_regions()' will revert it. + * + * The total number of regions could be higher than the user-defined limit, + * max_nr_regions for some cases. For example, the user can update + * max_nr_regions to a number that lower than the current number of regions + * while DAMON is running. For such a case, repeat merging until the limit is + * met while increasing @threshold up to possible maximum level. + */ +static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, + unsigned long sz_limit) +{ + struct damon_target *t; + unsigned int nr_regions; + unsigned int max_thres; + + max_thres = c->attrs.aggr_interval / + (c->attrs.sample_interval ? c->attrs.sample_interval : 1); + do { + nr_regions = 0; + damon_for_each_target(t, c) { + damon_merge_regions_of(t, threshold, sz_limit); + nr_regions += damon_nr_regions(t); + } + threshold = max(1, threshold * 2); + } while (nr_regions > c->attrs.max_nr_regions && + threshold / 2 < max_thres); +} + +/* + * Split a region in two + * + * r the region to be split + * sz_r size of the first sub-region that will be made + */ +static void damon_split_region_at(struct damon_target *t, + struct damon_region *r, unsigned long sz_r) +{ + struct damon_region *new; + + new = damon_new_region(r->ar.start + sz_r, r->ar.end); + if (!new) + return; + + r->ar.end = new->ar.start; + + new->age = r->age; + new->last_nr_accesses = r->last_nr_accesses; + new->nr_accesses_bp = r->nr_accesses_bp; + new->nr_accesses = r->nr_accesses; + + damon_insert_region(new, r, damon_next_region(r), t); +} + +/* Split every region in the given target into 'nr_subs' regions */ +static void damon_split_regions_of(struct damon_target *t, int nr_subs, + unsigned long min_sz_region) +{ + struct damon_region *r, *next; + unsigned long sz_region, sz_sub = 0; + int i; + + damon_for_each_region_safe(r, next, t) { + sz_region = damon_sz_region(r); + + for (i = 0; i < nr_subs - 1 && + sz_region > 2 * min_sz_region; i++) { + /* + * Randomly select size of left sub-region to be at + * least 10 percent and at most 90% of original region + */ + sz_sub = ALIGN_DOWN(damon_rand(1, 10) * + sz_region / 10, min_sz_region); + /* Do not allow blank region */ + if (sz_sub == 0 || sz_sub >= sz_region) + continue; + + damon_split_region_at(t, r, sz_sub); + sz_region = sz_sub; + } + } +} + +/* + * Split every target region into randomly-sized small regions + * + * This function splits every target region into random-sized small regions if + * current total number of the regions is equal or smaller than half of the + * user-specified maximum number of regions. This is for maximizing the + * monitoring accuracy under the dynamically changeable access patterns. If a + * split was unnecessarily made, later 'kdamond_merge_regions()' will revert + * it. + */ +static void kdamond_split_regions(struct damon_ctx *ctx) +{ + struct damon_target *t; + unsigned int nr_regions = 0; + static unsigned int last_nr_regions; + int nr_subregions = 2; + + damon_for_each_target(t, ctx) + nr_regions += damon_nr_regions(t); + + if (nr_regions > ctx->attrs.max_nr_regions / 2) + return; + + /* Maybe the middle of the region has different access frequency */ + if (last_nr_regions == nr_regions && + nr_regions < ctx->attrs.max_nr_regions / 3) + nr_subregions = 3; + + damon_for_each_target(t, ctx) + damon_split_regions_of(t, nr_subregions, ctx->min_sz_region); + + last_nr_regions = nr_regions; +} + +/* + * Check whether current monitoring should be stopped + * + * The monitoring is stopped when either the user requested to stop, or all + * monitoring targets are invalid. + * + * Returns true if need to stop current monitoring. + */ +static bool kdamond_need_stop(struct damon_ctx *ctx) +{ + struct damon_target *t; + + if (kthread_should_stop()) + return true; + + if (!ctx->ops.target_valid) + return false; + + damon_for_each_target(t, ctx) { + if (ctx->ops.target_valid(t)) + return false; + } + + return true; +} + +static int damos_get_wmark_metric_value(enum damos_wmark_metric metric, + unsigned long *metric_value) +{ + switch (metric) { + case DAMOS_WMARK_FREE_MEM_RATE: + *metric_value = global_zone_page_state(NR_FREE_PAGES) * 1000 / + totalram_pages(); + return 0; + default: + break; + } + return -EINVAL; +} + +/* + * Returns zero if the scheme is active. Else, returns time to wait for next + * watermark check in micro-seconds. + */ +static unsigned long damos_wmark_wait_us(struct damos *scheme) +{ + unsigned long metric; + + if (damos_get_wmark_metric_value(scheme->wmarks.metric, &metric)) + return 0; + + /* higher than high watermark or lower than low watermark */ + if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { + if (scheme->wmarks.activated) + pr_debug("deactivate a scheme (%d) for %s wmark\n", + scheme->action, + str_high_low(metric > scheme->wmarks.high)); + scheme->wmarks.activated = false; + return scheme->wmarks.interval; + } + + /* inactive and higher than middle watermark */ + if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && + !scheme->wmarks.activated) + return scheme->wmarks.interval; + + if (!scheme->wmarks.activated) + pr_debug("activate a scheme (%d)\n", scheme->action); + scheme->wmarks.activated = true; + return 0; +} + +static void kdamond_usleep(unsigned long usecs) +{ + if (usecs >= USLEEP_RANGE_UPPER_BOUND) + schedule_timeout_idle(usecs_to_jiffies(usecs)); + else + usleep_range_idle(usecs, usecs + 1); +} + +/* + * kdamond_call() - handle damon_call_control objects. + * @ctx: The &struct damon_ctx of the kdamond. + * @cancel: Whether to cancel the invocation of the function. + * + * If there are &struct damon_call_control requests that registered via + * &damon_call() on @ctx, do or cancel the invocation of the function depending + * on @cancel. @cancel is set when the kdamond is already out of the main loop + * and therefore will be terminated. + */ +static void kdamond_call(struct damon_ctx *ctx, bool cancel) +{ + struct damon_call_control *control; + LIST_HEAD(repeat_controls); + int ret = 0; + + while (true) { + mutex_lock(&ctx->call_controls_lock); + control = list_first_entry_or_null(&ctx->call_controls, + struct damon_call_control, list); + mutex_unlock(&ctx->call_controls_lock); + if (!control) + break; + if (cancel) { + control->canceled = true; + } else { + ret = control->fn(control->data); + control->return_code = ret; + } + mutex_lock(&ctx->call_controls_lock); + list_del(&control->list); + mutex_unlock(&ctx->call_controls_lock); + if (!control->repeat) { + complete(&control->completion); + } else if (control->canceled && control->dealloc_on_cancel) { + kfree(control); + continue; + } else { + list_add(&control->list, &repeat_controls); + } + } + control = list_first_entry_or_null(&repeat_controls, + struct damon_call_control, list); + if (!control || cancel) + return; + mutex_lock(&ctx->call_controls_lock); + list_add_tail(&control->list, &ctx->call_controls); + mutex_unlock(&ctx->call_controls_lock); +} + +/* Returns negative error code if it's not activated but should return */ +static int kdamond_wait_activation(struct damon_ctx *ctx) +{ + struct damos *s; + unsigned long wait_time; + unsigned long min_wait_time = 0; + bool init_wait_time = false; + + while (!kdamond_need_stop(ctx)) { + damon_for_each_scheme(s, ctx) { + wait_time = damos_wmark_wait_us(s); + if (!init_wait_time || wait_time < min_wait_time) { + init_wait_time = true; + min_wait_time = wait_time; + } + } + if (!min_wait_time) + return 0; + + kdamond_usleep(min_wait_time); + + kdamond_call(ctx, false); + damos_walk_cancel(ctx); + } + return -EBUSY; +} + +static void kdamond_init_ctx(struct damon_ctx *ctx) +{ + unsigned long sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + unsigned long apply_interval; + struct damos *scheme; + + ctx->passed_sample_intervals = 0; + ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / + sample_interval; + ctx->next_intervals_tune_sis = ctx->next_aggregation_sis * + ctx->attrs.intervals_goal.aggrs; + + damon_for_each_scheme(scheme, ctx) { + apply_interval = scheme->apply_interval_us ? + scheme->apply_interval_us : ctx->attrs.aggr_interval; + scheme->next_apply_sis = apply_interval / sample_interval; + damos_set_filters_default_reject(scheme); + } +} + +/* + * The monitoring daemon that runs as a kernel thread + */ +static int kdamond_fn(void *data) +{ + struct damon_ctx *ctx = data; + struct damon_target *t; + struct damon_region *r, *next; + unsigned int max_nr_accesses = 0; + unsigned long sz_limit = 0; + + pr_debug("kdamond (%d) starts\n", current->pid); + + complete(&ctx->kdamond_started); + kdamond_init_ctx(ctx); + + if (ctx->ops.init) + ctx->ops.init(ctx); + ctx->regions_score_histogram = kmalloc_array(DAMOS_MAX_SCORE + 1, + sizeof(*ctx->regions_score_histogram), GFP_KERNEL); + if (!ctx->regions_score_histogram) + goto done; + + sz_limit = damon_region_sz_limit(ctx); + + while (!kdamond_need_stop(ctx)) { + /* + * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could + * be changed from kdamond_call(). Read the values here, and + * use those for this iteration. That is, damon_set_attrs() + * updated new values are respected from next iteration. + */ + unsigned long next_aggregation_sis = ctx->next_aggregation_sis; + unsigned long next_ops_update_sis = ctx->next_ops_update_sis; + unsigned long sample_interval = ctx->attrs.sample_interval; + + if (kdamond_wait_activation(ctx)) + break; + + if (ctx->ops.prepare_access_checks) + ctx->ops.prepare_access_checks(ctx); + + kdamond_usleep(sample_interval); + ctx->passed_sample_intervals++; + + if (ctx->ops.check_accesses) + max_nr_accesses = ctx->ops.check_accesses(ctx); + + if (ctx->passed_sample_intervals >= next_aggregation_sis) + kdamond_merge_regions(ctx, + max_nr_accesses / 10, + sz_limit); + + /* + * do kdamond_call() and kdamond_apply_schemes() after + * kdamond_merge_regions() if possible, to reduce overhead + */ + kdamond_call(ctx, false); + if (!list_empty(&ctx->schemes)) + kdamond_apply_schemes(ctx); + else + damos_walk_cancel(ctx); + + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + if (ctx->passed_sample_intervals >= next_aggregation_sis) { + if (ctx->attrs.intervals_goal.aggrs && + ctx->passed_sample_intervals >= + ctx->next_intervals_tune_sis) { + /* + * ctx->next_aggregation_sis might be updated + * from kdamond_call(). In the case, + * damon_set_attrs() which will be called from + * kdamond_tune_interval() may wrongly think + * this is in the middle of the current + * aggregation, and make aggregation + * information reset for all regions. Then, + * following kdamond_reset_aggregated() call + * will make the region information invalid, + * particularly for ->nr_accesses_bp. + * + * Reset ->next_aggregation_sis to avoid that. + * It will anyway correctly updated after this + * if caluse. + */ + ctx->next_aggregation_sis = + next_aggregation_sis; + ctx->next_intervals_tune_sis += + ctx->attrs.aggr_samples * + ctx->attrs.intervals_goal.aggrs; + kdamond_tune_intervals(ctx); + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + + } + ctx->next_aggregation_sis = next_aggregation_sis + + ctx->attrs.aggr_interval / sample_interval; + + kdamond_reset_aggregated(ctx); + kdamond_split_regions(ctx); + } + + if (ctx->passed_sample_intervals >= next_ops_update_sis) { + ctx->next_ops_update_sis = next_ops_update_sis + + ctx->attrs.ops_update_interval / + sample_interval; + if (ctx->ops.update) + ctx->ops.update(ctx); + sz_limit = damon_region_sz_limit(ctx); + } + } +done: + damon_for_each_target(t, ctx) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + + if (ctx->ops.cleanup) + ctx->ops.cleanup(ctx); + kfree(ctx->regions_score_histogram); + + pr_debug("kdamond (%d) finishes\n", current->pid); + mutex_lock(&ctx->kdamond_lock); + ctx->kdamond = NULL; + mutex_unlock(&ctx->kdamond_lock); + + kdamond_call(ctx, true); + damos_walk_cancel(ctx); + + mutex_lock(&damon_lock); + nr_running_ctxs--; + if (!nr_running_ctxs && running_exclusive_ctxs) + running_exclusive_ctxs = false; + mutex_unlock(&damon_lock); + + damon_destroy_targets(ctx); + return 0; +} + +/* + * struct damon_system_ram_region - System RAM resource address region of + * [@start, @end). + * @start: Start address of the region (inclusive). + * @end: End address of the region (exclusive). + */ +struct damon_system_ram_region { + unsigned long start; + unsigned long end; +}; + +static int walk_system_ram(struct resource *res, void *arg) +{ + struct damon_system_ram_region *a = arg; + + if (a->end - a->start < resource_size(res)) { + a->start = res->start; + a->end = res->end; + } + return 0; +} + +/* + * Find biggest 'System RAM' resource and store its start and end address in + * @start and @end, respectively. If no System RAM is found, returns false. + */ +static bool damon_find_biggest_system_ram(unsigned long *start, + unsigned long *end) + +{ + struct damon_system_ram_region arg = {}; + + walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); + if (arg.end <= arg.start) + return false; + + *start = arg.start; + *end = arg.end; + return true; +} + +/** + * damon_set_region_biggest_system_ram_default() - Set the region of the given + * monitoring target as requested, or biggest 'System RAM'. + * @t: The monitoring target to set the region. + * @start: The pointer to the start address of the region. + * @end: The pointer to the end address of the region. + * @min_sz_region: Minimum region size. + * + * This function sets the region of @t as requested by @start and @end. If the + * values of @start and @end are zero, however, this function finds the biggest + * 'System RAM' resource and sets the region to cover the resource. In the + * latter case, this function saves the start and end addresses of the resource + * in @start and @end, respectively. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_set_region_biggest_system_ram_default(struct damon_target *t, + unsigned long *start, unsigned long *end, + unsigned long min_sz_region) +{ + struct damon_addr_range addr_range; + + if (*start > *end) + return -EINVAL; + + if (!*start && !*end && + !damon_find_biggest_system_ram(start, end)) + return -EINVAL; + + addr_range.start = *start; + addr_range.end = *end; + return damon_set_regions(t, &addr_range, 1, min_sz_region); +} + +/* + * damon_moving_sum() - Calculate an inferred moving sum value. + * @mvsum: Inferred sum of the last @len_window values. + * @nomvsum: Non-moving sum of the last discrete @len_window window values. + * @len_window: The number of last values to take care of. + * @new_value: New value that will be added to the pseudo moving sum. + * + * Moving sum (moving average * window size) is good for handling noise, but + * the cost of keeping past values can be high for arbitrary window size. This + * function implements a lightweight pseudo moving sum function that doesn't + * keep the past window values. + * + * It simply assumes there was no noise in the past, and get the no-noise + * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a + * non-moving sum of the last window. For example, if @len_window is 10 and we + * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25 + * values. Hence, this function simply drops @nomvsum / @len_window from + * given @mvsum and add @new_value. + * + * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for + * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For + * calculating next moving sum with a new value, we should drop 0 from 50 and + * add the new value. However, this function assumes it got value 5 for each + * of the last ten times. Based on the assumption, when the next value is + * measured, it drops the assumed past value, 5 from the current sum, and add + * the new value to get the updated pseduo-moving average. + * + * This means the value could have errors, but the errors will be disappeared + * for every @len_window aligned calls. For example, if @len_window is 10, the + * pseudo moving sum with 11th value to 19th value would have an error. But + * the sum with 20th value will not have the error. + * + * Return: Pseudo-moving average after getting the @new_value. + */ +static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, + unsigned int len_window, unsigned int new_value) +{ + return mvsum - nomvsum / len_window + new_value; +} + +/** + * damon_update_region_access_rate() - Update the access rate of a region. + * @r: The DAMON region to update for its access check result. + * @accessed: Whether the region has accessed during last sampling interval. + * @attrs: The damon_attrs of the DAMON context. + * + * Update the access rate of a region with the region's last sampling interval + * access check result. + * + * Usually this will be called by &damon_operations->check_accesses callback. + */ +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs) +{ + unsigned int len_window = 1; + + /* + * sample_interval can be zero, but cannot be larger than + * aggr_interval, owing to validation of damon_set_attrs(). + */ + if (attrs->sample_interval) + len_window = damon_max_nr_accesses(attrs); + r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp, + r->last_nr_accesses * 10000, len_window, + accessed ? 10000 : 0); + + if (accessed) + r->nr_accesses++; +} + +/** + * damon_initialized() - Return if DAMON is ready to be used. + * + * Return: true if DAMON is ready to be used, false otherwise. + */ +bool damon_initialized(void) +{ + return damon_region_cache != NULL; +} + +static int __init damon_init(void) +{ + damon_region_cache = KMEM_CACHE(damon_region, 0); + if (unlikely(!damon_region_cache)) { + pr_err("creating damon_region_cache fails\n"); + return -ENOMEM; + } + + return 0; +} + +subsys_initcall(damon_init); + +#include "tests/core-kunit.h" diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c new file mode 100644 index 000000000000..49b4bc294f4e --- /dev/null +++ b/mm/damon/lru_sort.c @@ -0,0 +1,394 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON-based LRU-lists Sorting + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#define pr_fmt(fmt) "damon-lru-sort: " fmt + +#include <linux/damon.h> +#include <linux/kstrtox.h> +#include <linux/module.h> + +#include "modules-common.h" + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_lru_sort." + +/* + * Enable or disable DAMON_LRU_SORT. + * + * You can enable DAMON_LRU_SORT by setting the value of this parameter as + * ``Y``. Setting it as ``N`` disables DAMON_LRU_SORT. Note that + * DAMON_LRU_SORT could do no real monitoring and LRU-lists sorting due to the + * watermarks-based activation condition. Refer to below descriptions for the + * watermarks parameter for this. + */ +static bool enabled __read_mostly; + +/* + * Make DAMON_LRU_SORT reads the input parameters again, except ``enabled``. + * + * Input parameters that updated while DAMON_LRU_SORT is running are not + * applied by default. Once this parameter is set as ``Y``, DAMON_LRU_SORT + * reads values of parametrs except ``enabled`` again. Once the re-reading is + * done, this parameter is set as ``N``. If invalid parameters are found while + * the re-reading, DAMON_LRU_SORT will be disabled. + */ +static bool commit_inputs __read_mostly; +module_param(commit_inputs, bool, 0600); + +/* + * Access frequency threshold for hot memory regions identification in permil. + * + * If a memory region is accessed in frequency of this or higher, + * DAMON_LRU_SORT identifies the region as hot, and mark it as accessed on the + * LRU list, so that it could not be reclaimed under memory pressure. 50% by + * default. + */ +static unsigned long hot_thres_access_freq = 500; +module_param(hot_thres_access_freq, ulong, 0600); + +/* + * Time threshold for cold memory regions identification in microseconds. + * + * If a memory region is not accessed for this or longer time, DAMON_LRU_SORT + * identifies the region as cold, and mark it as unaccessed on the LRU list, so + * that it could be reclaimed first under memory pressure. 120 seconds by + * default. + */ +static unsigned long cold_min_age __read_mostly = 120000000; +module_param(cold_min_age, ulong, 0600); + +static struct damos_quota damon_lru_sort_quota = { + /* Use up to 10 ms per 1 sec, by default */ + .ms = 10, + .sz = 0, + .reset_interval = 1000, + /* Within the quota, mark hotter regions accessed first. */ + .weight_sz = 0, + .weight_nr_accesses = 1, + .weight_age = 0, +}; +DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(damon_lru_sort_quota); + +static struct damos_watermarks damon_lru_sort_wmarks = { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .interval = 5000000, /* 5 seconds */ + .high = 200, /* 20 percent */ + .mid = 150, /* 15 percent */ + .low = 50, /* 5 percent */ +}; +DEFINE_DAMON_MODULES_WMARKS_PARAMS(damon_lru_sort_wmarks); + +static struct damon_attrs damon_lru_sort_mon_attrs = { + .sample_interval = 5000, /* 5 ms */ + .aggr_interval = 100000, /* 100 ms */ + .ops_update_interval = 0, + .min_nr_regions = 10, + .max_nr_regions = 1000, +}; +DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(damon_lru_sort_mon_attrs); + +/* + * Start of the target memory region in physical address. + * + * The start physical address of memory region that DAMON_LRU_SORT will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_start __read_mostly; +module_param(monitor_region_start, ulong, 0600); + +/* + * End of the target memory region in physical address. + * + * The end physical address of memory region that DAMON_LRU_SORT will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_end __read_mostly; +module_param(monitor_region_end, ulong, 0600); + +/* + * Scale factor for DAMON_LRU_SORT to ops address conversion. + * + * This parameter must not be set to 0. + */ +static unsigned long addr_unit __read_mostly = 1; + +/* + * PID of the DAMON thread + * + * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread. + * Else, -1. + */ +static int kdamond_pid __read_mostly = -1; +module_param(kdamond_pid, int, 0400); + +static struct damos_stat damon_lru_sort_hot_stat; +DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_hot_stat, + lru_sort_tried_hot_regions, lru_sorted_hot_regions, + hot_quota_exceeds); + +static struct damos_stat damon_lru_sort_cold_stat; +DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_cold_stat, + lru_sort_tried_cold_regions, lru_sorted_cold_regions, + cold_quota_exceeds); + +static struct damos_access_pattern damon_lru_sort_stub_pattern = { + /* Find regions having PAGE_SIZE or larger size */ + .min_sz_region = PAGE_SIZE, + .max_sz_region = ULONG_MAX, + /* no matter its access frequency */ + .min_nr_accesses = 0, + .max_nr_accesses = UINT_MAX, + /* no matter its age */ + .min_age_region = 0, + .max_age_region = UINT_MAX, +}; + +static struct damon_ctx *ctx; +static struct damon_target *target; + +static struct damos *damon_lru_sort_new_scheme( + struct damos_access_pattern *pattern, enum damos_action action) +{ + struct damos_quota quota = damon_lru_sort_quota; + + /* Use half of total quota for hot/cold pages sorting */ + quota.ms = quota.ms / 2; + + return damon_new_scheme( + /* find the pattern, and */ + pattern, + /* (de)prioritize on LRU-lists */ + action, + /* for each aggregation interval */ + 0, + /* under the quota. */ + "a, + /* (De)activate this according to the watermarks. */ + &damon_lru_sort_wmarks, + NUMA_NO_NODE); +} + +/* Create a DAMON-based operation scheme for hot memory regions */ +static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres) +{ + struct damos_access_pattern pattern = damon_lru_sort_stub_pattern; + + pattern.min_nr_accesses = hot_thres; + return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_PRIO); +} + +/* Create a DAMON-based operation scheme for cold memory regions */ +static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) +{ + struct damos_access_pattern pattern = damon_lru_sort_stub_pattern; + + pattern.max_nr_accesses = 0; + pattern.min_age_region = cold_thres; + return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO); +} + +static int damon_lru_sort_apply_parameters(void) +{ + struct damon_ctx *param_ctx; + struct damon_target *param_target; + struct damos *hot_scheme, *cold_scheme; + unsigned int hot_thres, cold_thres; + int err; + + err = damon_modules_new_paddr_ctx_target(¶m_ctx, ¶m_target); + if (err) + return err; + + /* + * If monitor_region_start/end are unset, always silently + * reset addr_unit to 1. + */ + if (!monitor_region_start && !monitor_region_end) + addr_unit = 1; + param_ctx->addr_unit = addr_unit; + param_ctx->min_sz_region = max(DAMON_MIN_REGION / addr_unit, 1); + + if (!damon_lru_sort_mon_attrs.sample_interval) { + err = -EINVAL; + goto out; + } + + err = damon_set_attrs(param_ctx, &damon_lru_sort_mon_attrs); + if (err) + goto out; + + err = -ENOMEM; + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * + hot_thres_access_freq / 1000; + hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres); + if (!hot_scheme) + goto out; + + cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval; + cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres); + if (!cold_scheme) { + damon_destroy_scheme(hot_scheme); + goto out; + } + + damon_set_schemes(param_ctx, &hot_scheme, 1); + damon_add_scheme(param_ctx, cold_scheme); + + err = damon_set_region_biggest_system_ram_default(param_target, + &monitor_region_start, + &monitor_region_end, + param_ctx->min_sz_region); + if (err) + goto out; + err = damon_commit_ctx(ctx, param_ctx); +out: + damon_destroy_ctx(param_ctx); + return err; +} + +static int damon_lru_sort_handle_commit_inputs(void) +{ + int err; + + if (!commit_inputs) + return 0; + + err = damon_lru_sort_apply_parameters(); + commit_inputs = false; + return err; +} + +static int damon_lru_sort_damon_call_fn(void *arg) +{ + struct damon_ctx *c = arg; + struct damos *s; + + /* update the stats parameter */ + damon_for_each_scheme(s, c) { + if (s->action == DAMOS_LRU_PRIO) + damon_lru_sort_hot_stat = s->stat; + else if (s->action == DAMOS_LRU_DEPRIO) + damon_lru_sort_cold_stat = s->stat; + } + + return damon_lru_sort_handle_commit_inputs(); +} + +static struct damon_call_control call_control = { + .fn = damon_lru_sort_damon_call_fn, + .repeat = true, +}; + +static int damon_lru_sort_turn(bool on) +{ + int err; + + if (!on) { + err = damon_stop(&ctx, 1); + if (!err) + kdamond_pid = -1; + return err; + } + + err = damon_lru_sort_apply_parameters(); + if (err) + return err; + + err = damon_start(&ctx, 1, true); + if (err) + return err; + kdamond_pid = ctx->kdamond->pid; + return damon_call(ctx, &call_control); +} + +static int damon_lru_sort_addr_unit_store(const char *val, + const struct kernel_param *kp) +{ + unsigned long input_addr_unit; + int err = kstrtoul(val, 0, &input_addr_unit); + + if (err) + return err; + if (!input_addr_unit) + return -EINVAL; + + addr_unit = input_addr_unit; + return 0; +} + +static const struct kernel_param_ops addr_unit_param_ops = { + .set = damon_lru_sort_addr_unit_store, + .get = param_get_ulong, +}; + +module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600); +MODULE_PARM_DESC(addr_unit, + "Scale factor for DAMON_LRU_SORT to ops address conversion (default: 1)"); + +static int damon_lru_sort_enabled_store(const char *val, + const struct kernel_param *kp) +{ + bool is_enabled = enabled; + bool enable; + int err; + + err = kstrtobool(val, &enable); + if (err) + return err; + + if (is_enabled == enable) + return 0; + + /* Called before init function. The function will handle this. */ + if (!damon_initialized()) + goto set_param_out; + + err = damon_lru_sort_turn(enable); + if (err) + return err; + +set_param_out: + enabled = enable; + return err; +} + +static const struct kernel_param_ops enabled_param_ops = { + .set = damon_lru_sort_enabled_store, + .get = param_get_bool, +}; + +module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +MODULE_PARM_DESC(enabled, + "Enable or disable DAMON_LRU_SORT (default: disabled)"); + +static int __init damon_lru_sort_init(void) +{ + int err; + + if (!damon_initialized()) { + err = -ENOMEM; + goto out; + } + err = damon_modules_new_paddr_ctx_target(&ctx, &target); + if (err) + goto out; + + call_control.data = ctx; + + /* 'enabled' has set before this function, probably via command line */ + if (enabled) + err = damon_lru_sort_turn(true); + +out: + if (err && enabled) + enabled = false; + return err; +} + +module_init(damon_lru_sort_init); diff --git a/mm/damon/modules-common.c b/mm/damon/modules-common.c new file mode 100644 index 000000000000..86d58f8c4f63 --- /dev/null +++ b/mm/damon/modules-common.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Code for DAMON Modules + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#include <linux/damon.h> + +#include "modules-common.h" + +/* + * Allocate, set, and return a DAMON context for the physical address space. + * @ctxp: Pointer to save the point to the newly created context + * @targetp: Pointer to save the point to the newly created target + */ +int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp, + struct damon_target **targetp) +{ + struct damon_ctx *ctx; + struct damon_target *target; + + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + + if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + + target = damon_new_target(); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + + *ctxp = ctx; + *targetp = target; + return 0; +} diff --git a/mm/damon/modules-common.h b/mm/damon/modules-common.h new file mode 100644 index 000000000000..f103ad556368 --- /dev/null +++ b/mm/damon/modules-common.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common Code for DAMON Modules + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#include <linux/moduleparam.h> + +#define DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(attrs) \ + module_param_named(sample_interval, attrs.sample_interval, \ + ulong, 0600); \ + module_param_named(aggr_interval, attrs.aggr_interval, ulong, \ + 0600); \ + module_param_named(min_nr_regions, attrs.min_nr_regions, ulong, \ + 0600); \ + module_param_named(max_nr_regions, attrs.max_nr_regions, ulong, \ + 0600); + +#define DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(quota) \ + module_param_named(quota_ms, quota.ms, ulong, 0600); \ + module_param_named(quota_reset_interval_ms, \ + quota.reset_interval, ulong, 0600); + +#define DEFINE_DAMON_MODULES_DAMOS_QUOTAS(quota) \ + DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(quota) \ + module_param_named(quota_sz, quota.sz, ulong, 0600); + +#define DEFINE_DAMON_MODULES_WMARKS_PARAMS(wmarks) \ + module_param_named(wmarks_interval, wmarks.interval, ulong, \ + 0600); \ + module_param_named(wmarks_high, wmarks.high, ulong, 0600); \ + module_param_named(wmarks_mid, wmarks.mid, ulong, 0600); \ + module_param_named(wmarks_low, wmarks.low, ulong, 0600); + +#define DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(stat, try_name, \ + succ_name, qt_exceed_name) \ + module_param_named(nr_##try_name, stat.nr_tried, ulong, 0400); \ + module_param_named(bytes_##try_name, stat.sz_tried, ulong, \ + 0400); \ + module_param_named(nr_##succ_name, stat.nr_applied, ulong, \ + 0400); \ + module_param_named(bytes_##succ_name, stat.sz_applied, ulong, \ + 0400); \ + module_param_named(nr_##qt_exceed_name, stat.qt_exceeds, ulong, \ + 0400); + +int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp, + struct damon_target **targetp); diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c new file mode 100644 index 000000000000..a218d9922234 --- /dev/null +++ b/mm/damon/ops-common.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Code for Data Access Monitoring + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#include <linux/migrate.h> +#include <linux/mmu_notifier.h> +#include <linux/page_idle.h> +#include <linux/pagemap.h> +#include <linux/rmap.h> +#include <linux/swap.h> +#include <linux/leafops.h> + +#include "../internal.h" +#include "ops-common.h" + +/* + * Get an online page for a pfn if it's in the LRU list. Otherwise, returns + * NULL. + * + * The body of this function is stolen from the 'page_idle_get_folio()'. We + * steal rather than reuse it because the code is quite simple. + */ +struct folio *damon_get_folio(unsigned long pfn) +{ + struct page *page = pfn_to_online_page(pfn); + struct folio *folio; + + if (!page) + return NULL; + + folio = page_folio(page); + if (!folio_test_lru(folio) || !folio_try_get(folio)) + return NULL; + if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) { + folio_put(folio); + folio = NULL; + } + return folio; +} + +void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr) +{ + pte_t pteval = ptep_get(pte); + struct folio *folio; + bool young = false; + unsigned long pfn; + + if (likely(pte_present(pteval))) + pfn = pte_pfn(pteval); + else + pfn = softleaf_to_pfn(softleaf_from_pte(pteval)); + + folio = damon_get_folio(pfn); + if (!folio) + return; + + /* + * PFN swap PTEs, such as device-exclusive ones, that actually map pages + * are "old" from a CPU perspective. The MMU notifier takes care of any + * device aspects. + */ + if (likely(pte_present(pteval))) + young |= ptep_test_and_clear_young(vma, addr, pte); + young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE); + if (young) + folio_set_young(folio); + + folio_set_idle(folio); + folio_put(folio); +} + +void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + pmd_t pmdval = pmdp_get(pmd); + struct folio *folio; + bool young = false; + unsigned long pfn; + + if (likely(pmd_present(pmdval))) + pfn = pmd_pfn(pmdval); + else + pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval)); + + folio = damon_get_folio(pfn); + if (!folio) + return; + + if (likely(pmd_present(pmdval))) + young |= pmdp_clear_young_notify(vma, addr, pmd); + young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE); + if (young) + folio_set_young(folio); + + folio_set_idle(folio); + folio_put(folio); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +} + +#define DAMON_MAX_SUBSCORE (100) +#define DAMON_MAX_AGE_IN_LOG (32) + +int damon_hot_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s) +{ + int freq_subscore; + unsigned int age_in_sec; + int age_in_log, age_subscore; + unsigned int freq_weight = s->quota.weight_nr_accesses; + unsigned int age_weight = s->quota.weight_age; + int hotness; + + freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / + damon_max_nr_accesses(&c->attrs); + + age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000; + for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; + age_in_log++, age_in_sec >>= 1) + ; + + /* If frequency is 0, higher age means it's colder */ + if (freq_subscore == 0) + age_in_log *= -1; + + /* + * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. + * Scale it to be in [0, 100] and set it as age subscore. + */ + age_in_log += DAMON_MAX_AGE_IN_LOG; + age_subscore = age_in_log * DAMON_MAX_SUBSCORE / + DAMON_MAX_AGE_IN_LOG / 2; + + hotness = (freq_weight * freq_subscore + age_weight * age_subscore); + if (freq_weight + age_weight) + hotness /= freq_weight + age_weight; + /* + * Transform it to fit in [0, DAMOS_MAX_SCORE] + */ + hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; + + return hotness; +} + +int damon_cold_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s) +{ + int hotness = damon_hot_score(c, r, s); + + /* Return coldness of the region */ + return DAMOS_MAX_SCORE - hotness; +} + +static bool damon_folio_mkold_one(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr, void *arg) +{ + DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); + + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) + damon_ptep_mkold(pvmw.pte, vma, addr); + else + damon_pmdp_mkold(pvmw.pmd, vma, addr); + } + return true; +} + +void damon_folio_mkold(struct folio *folio) +{ + struct rmap_walk_control rwc = { + .rmap_one = damon_folio_mkold_one, + .anon_lock = folio_lock_anon_vma_read, + }; + + if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { + folio_set_idle(folio); + return; + } + + if (!folio_trylock(folio)) + return; + + rmap_walk(folio, &rwc); + folio_unlock(folio); + +} + +static bool damon_folio_young_one(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr, void *arg) +{ + bool *accessed = arg; + DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); + pte_t pte; + + *accessed = false; + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) { + pte = ptep_get(pvmw.pte); + + /* + * PFN swap PTEs, such as device-exclusive ones, that + * actually map pages are "old" from a CPU perspective. + * The MMU notifier takes care of any device aspects. + */ + *accessed = (pte_present(pte) && pte_young(pte)) || + !folio_test_idle(folio) || + mmu_notifier_test_young(vma->vm_mm, addr); + } else { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + pmd_t pmd = pmdp_get(pvmw.pmd); + + *accessed = (pmd_present(pmd) && pmd_young(pmd)) || + !folio_test_idle(folio) || + mmu_notifier_test_young(vma->vm_mm, addr); +#else + WARN_ON_ONCE(1); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + } + if (*accessed) { + page_vma_mapped_walk_done(&pvmw); + break; + } + } + + /* If accessed, stop walking */ + return *accessed == false; +} + +bool damon_folio_young(struct folio *folio) +{ + bool accessed = false; + struct rmap_walk_control rwc = { + .arg = &accessed, + .rmap_one = damon_folio_young_one, + .anon_lock = folio_lock_anon_vma_read, + }; + + if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { + if (folio_test_idle(folio)) + return false; + else + return true; + } + + if (!folio_trylock(folio)) + return false; + + rmap_walk(folio, &rwc); + folio_unlock(folio); + + return accessed; +} + +bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio) +{ + bool matched = false; + struct mem_cgroup *memcg; + size_t folio_sz; + + switch (filter->type) { + case DAMOS_FILTER_TYPE_ANON: + matched = folio_test_anon(folio); + break; + case DAMOS_FILTER_TYPE_ACTIVE: + matched = folio_test_active(folio); + break; + case DAMOS_FILTER_TYPE_MEMCG: + rcu_read_lock(); + memcg = folio_memcg_check(folio); + if (!memcg) + matched = false; + else + matched = filter->memcg_id == mem_cgroup_id(memcg); + rcu_read_unlock(); + break; + case DAMOS_FILTER_TYPE_YOUNG: + matched = damon_folio_young(folio); + if (matched) + damon_folio_mkold(folio); + break; + case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: + folio_sz = folio_size(folio); + matched = filter->sz_range.min <= folio_sz && + folio_sz <= filter->sz_range.max; + break; + case DAMOS_FILTER_TYPE_UNMAPPED: + matched = !folio_mapped(folio) || !folio_raw_mapping(folio); + break; + default: + break; + } + + return matched == filter->matching; +} + +static unsigned int __damon_migrate_folio_list( + struct list_head *migrate_folios, struct pglist_data *pgdat, + int target_nid) +{ + unsigned int nr_succeeded = 0; + struct migration_target_control mtc = { + /* + * Allocate from 'node', or fail quickly and quietly. + * When this happens, 'page' will likely just be discarded + * instead of migrated. + */ + .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | + __GFP_NOMEMALLOC | GFP_NOWAIT, + .nid = target_nid, + }; + + if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE) + return 0; + + if (list_empty(migrate_folios)) + return 0; + + /* Migration ignores all cpuset and mempolicy settings */ + migrate_pages(migrate_folios, alloc_migration_target, NULL, + (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON, + &nr_succeeded); + + return nr_succeeded; +} + +static unsigned int damon_migrate_folio_list(struct list_head *folio_list, + struct pglist_data *pgdat, + int target_nid) +{ + unsigned int nr_migrated = 0; + struct folio *folio; + LIST_HEAD(ret_folios); + LIST_HEAD(migrate_folios); + + while (!list_empty(folio_list)) { + struct folio *folio; + + cond_resched(); + + folio = lru_to_folio(folio_list); + list_del(&folio->lru); + + if (!folio_trylock(folio)) + goto keep; + + /* Relocate its contents to another node. */ + list_add(&folio->lru, &migrate_folios); + folio_unlock(folio); + continue; +keep: + list_add(&folio->lru, &ret_folios); + } + /* 'folio_list' is always empty here */ + + /* Migrate folios selected for migration */ + nr_migrated += __damon_migrate_folio_list( + &migrate_folios, pgdat, target_nid); + /* + * Folios that could not be migrated are still in @migrate_folios. Add + * those back on @folio_list + */ + if (!list_empty(&migrate_folios)) + list_splice_init(&migrate_folios, folio_list); + + try_to_unmap_flush(); + + list_splice(&ret_folios, folio_list); + + while (!list_empty(folio_list)) { + folio = lru_to_folio(folio_list); + list_del(&folio->lru); + folio_putback_lru(folio); + } + + return nr_migrated; +} + +unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid) +{ + int nid; + unsigned long nr_migrated = 0; + LIST_HEAD(node_folio_list); + unsigned int noreclaim_flag; + + if (list_empty(folio_list)) + return nr_migrated; + + if (target_nid < 0 || target_nid >= MAX_NUMNODES || + !node_state(target_nid, N_MEMORY)) + return nr_migrated; + + noreclaim_flag = memalloc_noreclaim_save(); + + nid = folio_nid(lru_to_folio(folio_list)); + do { + struct folio *folio = lru_to_folio(folio_list); + + if (nid == folio_nid(folio)) { + list_move(&folio->lru, &node_folio_list); + continue; + } + + nr_migrated += damon_migrate_folio_list(&node_folio_list, + NODE_DATA(nid), + target_nid); + nid = folio_nid(lru_to_folio(folio_list)); + } while (!list_empty(folio_list)); + + nr_migrated += damon_migrate_folio_list(&node_folio_list, + NODE_DATA(nid), + target_nid); + + memalloc_noreclaim_restore(noreclaim_flag); + + return nr_migrated; +} + +bool damos_ops_has_filter(struct damos *s) +{ + struct damos_filter *f; + + damos_for_each_ops_filter(f, s) + return true; + return false; +} diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h new file mode 100644 index 000000000000..5efa5b5970de --- /dev/null +++ b/mm/damon/ops-common.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common Code for Data Access Monitoring + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#include <linux/damon.h> + +struct folio *damon_get_folio(unsigned long pfn); + +void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr); +void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr); +void damon_folio_mkold(struct folio *folio); +bool damon_folio_young(struct folio *folio); + +int damon_cold_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s); +int damon_hot_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s); + +bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio); +unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid); + +bool damos_ops_has_filter(struct damos *s); diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c new file mode 100644 index 000000000000..07a8aead439e --- /dev/null +++ b/mm/damon/paddr.c @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Code for The Physical Address Space + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#define pr_fmt(fmt) "damon-pa: " fmt + +#include <linux/mmu_notifier.h> +#include <linux/page_idle.h> +#include <linux/pagemap.h> +#include <linux/rmap.h> +#include <linux/swap.h> +#include <linux/memory-tiers.h> +#include <linux/mm_inline.h> + +#include "../internal.h" +#include "ops-common.h" + +static phys_addr_t damon_pa_phys_addr( + unsigned long addr, unsigned long addr_unit) +{ + return (phys_addr_t)addr * addr_unit; +} + +static unsigned long damon_pa_core_addr( + phys_addr_t pa, unsigned long addr_unit) +{ + /* + * Use div_u64() for avoiding linking errors related with __udivdi3, + * __aeabi_uldivmod, or similar problems. This should also improve the + * performance optimization (read div_u64() comment for the detail). + */ + if (sizeof(pa) == 8 && sizeof(addr_unit) == 4) + return div_u64(pa, addr_unit); + return pa / addr_unit; +} + +static void damon_pa_mkold(phys_addr_t paddr) +{ + struct folio *folio = damon_get_folio(PHYS_PFN(paddr)); + + if (!folio) + return; + + damon_folio_mkold(folio); + folio_put(folio); +} + +static void __damon_pa_prepare_access_check(struct damon_region *r, + unsigned long addr_unit) +{ + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + + damon_pa_mkold(damon_pa_phys_addr(r->sampling_addr, addr_unit)); +} + +static void damon_pa_prepare_access_checks(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) + __damon_pa_prepare_access_check(r, ctx->addr_unit); + } +} + +static bool damon_pa_young(phys_addr_t paddr, unsigned long *folio_sz) +{ + struct folio *folio = damon_get_folio(PHYS_PFN(paddr)); + bool accessed; + + if (!folio) + return false; + + accessed = damon_folio_young(folio); + *folio_sz = folio_size(folio); + folio_put(folio); + return accessed; +} + +static void __damon_pa_check_access(struct damon_region *r, + struct damon_attrs *attrs, unsigned long addr_unit) +{ + static phys_addr_t last_addr; + static unsigned long last_folio_sz = PAGE_SIZE; + static bool last_accessed; + phys_addr_t sampling_addr = damon_pa_phys_addr( + r->sampling_addr, addr_unit); + + /* If the region is in the last checked page, reuse the result */ + if (ALIGN_DOWN(last_addr, last_folio_sz) == + ALIGN_DOWN(sampling_addr, last_folio_sz)) { + damon_update_region_access_rate(r, last_accessed, attrs); + return; + } + + last_accessed = damon_pa_young(sampling_addr, &last_folio_sz); + damon_update_region_access_rate(r, last_accessed, attrs); + + last_addr = sampling_addr; +} + +static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) { + __damon_pa_check_access( + r, &ctx->attrs, ctx->addr_unit); + max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + } + } + + return max_nr_accesses; +} + +/* + * damos_pa_filter_out - Return true if the page should be filtered out. + */ +static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio) +{ + struct damos_filter *filter; + + if (scheme->core_filters_allowed) + return false; + + damos_for_each_ops_filter(filter, scheme) { + if (damos_folio_filter_match(filter, folio)) + return !filter->allow; + } + return scheme->ops_filters_default_reject; +} + +static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s) +{ + if (!folio) + return true; + if (folio == s->last_applied) { + folio_put(folio); + return true; + } + return false; +} + +static unsigned long damon_pa_pageout(struct damon_region *r, + unsigned long addr_unit, struct damos *s, + unsigned long *sz_filter_passed) +{ + phys_addr_t addr, applied; + LIST_HEAD(folio_list); + bool install_young_filter = true; + struct damos_filter *filter; + struct folio *folio; + + /* check access in page level again by default */ + damos_for_each_ops_filter(filter, s) { + if (filter->type == DAMOS_FILTER_TYPE_YOUNG) { + install_young_filter = false; + break; + } + } + if (install_young_filter) { + filter = damos_new_filter( + DAMOS_FILTER_TYPE_YOUNG, true, false); + if (!filter) + return 0; + damos_add_filter(s, filter); + } + + addr = damon_pa_phys_addr(r->ar.start, addr_unit); + while (addr < damon_pa_phys_addr(r->ar.end, addr_unit)) { + folio = damon_get_folio(PHYS_PFN(addr)); + if (damon_pa_invalid_damos_folio(folio, s)) { + addr += PAGE_SIZE; + continue; + } + + if (damos_pa_filter_out(s, folio)) + goto put_folio; + else + *sz_filter_passed += folio_size(folio) / addr_unit; + + folio_clear_referenced(folio); + folio_test_clear_young(folio); + if (!folio_isolate_lru(folio)) + goto put_folio; + if (folio_test_unevictable(folio)) + folio_putback_lru(folio); + else + list_add(&folio->lru, &folio_list); +put_folio: + addr += folio_size(folio); + folio_put(folio); + } + if (install_young_filter) + damos_destroy_filter(filter); + applied = reclaim_pages(&folio_list); + cond_resched(); + s->last_applied = folio; + return damon_pa_core_addr(applied * PAGE_SIZE, addr_unit); +} + +static inline unsigned long damon_pa_mark_accessed_or_deactivate( + struct damon_region *r, unsigned long addr_unit, + struct damos *s, bool mark_accessed, + unsigned long *sz_filter_passed) +{ + phys_addr_t addr, applied = 0; + struct folio *folio; + + addr = damon_pa_phys_addr(r->ar.start, addr_unit); + while (addr < damon_pa_phys_addr(r->ar.end, addr_unit)) { + folio = damon_get_folio(PHYS_PFN(addr)); + if (damon_pa_invalid_damos_folio(folio, s)) { + addr += PAGE_SIZE; + continue; + } + + if (damos_pa_filter_out(s, folio)) + goto put_folio; + else + *sz_filter_passed += folio_size(folio) / addr_unit; + + if (mark_accessed) + folio_mark_accessed(folio); + else + folio_deactivate(folio); + applied += folio_nr_pages(folio); +put_folio: + addr += folio_size(folio); + folio_put(folio); + } + s->last_applied = folio; + return damon_pa_core_addr(applied * PAGE_SIZE, addr_unit); +} + +static unsigned long damon_pa_mark_accessed(struct damon_region *r, + unsigned long addr_unit, struct damos *s, + unsigned long *sz_filter_passed) +{ + return damon_pa_mark_accessed_or_deactivate(r, addr_unit, s, true, + sz_filter_passed); +} + +static unsigned long damon_pa_deactivate_pages(struct damon_region *r, + unsigned long addr_unit, struct damos *s, + unsigned long *sz_filter_passed) +{ + return damon_pa_mark_accessed_or_deactivate(r, addr_unit, s, false, + sz_filter_passed); +} + +static unsigned long damon_pa_migrate(struct damon_region *r, + unsigned long addr_unit, struct damos *s, + unsigned long *sz_filter_passed) +{ + phys_addr_t addr, applied; + LIST_HEAD(folio_list); + struct folio *folio; + + addr = damon_pa_phys_addr(r->ar.start, addr_unit); + while (addr < damon_pa_phys_addr(r->ar.end, addr_unit)) { + folio = damon_get_folio(PHYS_PFN(addr)); + if (damon_pa_invalid_damos_folio(folio, s)) { + addr += PAGE_SIZE; + continue; + } + + if (damos_pa_filter_out(s, folio)) + goto put_folio; + else + *sz_filter_passed += folio_size(folio) / addr_unit; + + if (!folio_isolate_lru(folio)) + goto put_folio; + list_add(&folio->lru, &folio_list); +put_folio: + addr += folio_size(folio); + folio_put(folio); + } + applied = damon_migrate_pages(&folio_list, s->target_nid); + cond_resched(); + s->last_applied = folio; + return damon_pa_core_addr(applied * PAGE_SIZE, addr_unit); +} + +static unsigned long damon_pa_stat(struct damon_region *r, + unsigned long addr_unit, struct damos *s, + unsigned long *sz_filter_passed) +{ + phys_addr_t addr; + struct folio *folio; + + if (!damos_ops_has_filter(s)) + return 0; + + addr = damon_pa_phys_addr(r->ar.start, addr_unit); + while (addr < damon_pa_phys_addr(r->ar.end, addr_unit)) { + folio = damon_get_folio(PHYS_PFN(addr)); + if (damon_pa_invalid_damos_folio(folio, s)) { + addr += PAGE_SIZE; + continue; + } + + if (!damos_pa_filter_out(s, folio)) + *sz_filter_passed += folio_size(folio) / addr_unit; + addr += folio_size(folio); + folio_put(folio); + } + s->last_applied = folio; + return 0; +} + +static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *scheme, unsigned long *sz_filter_passed) +{ + unsigned long aunit = ctx->addr_unit; + + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_pa_pageout(r, aunit, scheme, sz_filter_passed); + case DAMOS_LRU_PRIO: + return damon_pa_mark_accessed(r, aunit, scheme, + sz_filter_passed); + case DAMOS_LRU_DEPRIO: + return damon_pa_deactivate_pages(r, aunit, scheme, + sz_filter_passed); + case DAMOS_MIGRATE_HOT: + case DAMOS_MIGRATE_COLD: + return damon_pa_migrate(r, aunit, scheme, sz_filter_passed); + case DAMOS_STAT: + return damon_pa_stat(r, aunit, scheme, sz_filter_passed); + default: + /* DAMOS actions that not yet supported by 'paddr'. */ + break; + } + return 0; +} + +static int damon_pa_scheme_score(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_cold_score(context, r, scheme); + case DAMOS_LRU_PRIO: + return damon_hot_score(context, r, scheme); + case DAMOS_LRU_DEPRIO: + return damon_cold_score(context, r, scheme); + case DAMOS_MIGRATE_HOT: + return damon_hot_score(context, r, scheme); + case DAMOS_MIGRATE_COLD: + return damon_cold_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + +static int __init damon_pa_initcall(void) +{ + struct damon_operations ops = { + .id = DAMON_OPS_PADDR, + .init = NULL, + .update = NULL, + .prepare_access_checks = damon_pa_prepare_access_checks, + .check_accesses = damon_pa_check_accesses, + .target_valid = NULL, + .cleanup = NULL, + .apply_scheme = damon_pa_apply_scheme, + .get_scheme_score = damon_pa_scheme_score, + }; + + return damon_register_ops(&ops); +}; + +subsys_initcall(damon_pa_initcall); diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c new file mode 100644 index 000000000000..36a582e09eae --- /dev/null +++ b/mm/damon/reclaim.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON-based page reclamation + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#define pr_fmt(fmt) "damon-reclaim: " fmt + +#include <linux/damon.h> +#include <linux/kstrtox.h> +#include <linux/module.h> + +#include "modules-common.h" + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_reclaim." + +/* + * Enable or disable DAMON_RECLAIM. + * + * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``. + * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could + * do no real monitoring and reclamation due to the watermarks-based activation + * condition. Refer to below descriptions for the watermarks parameter for + * this. + */ +static bool enabled __read_mostly; + +/* + * Make DAMON_RECLAIM reads the input parameters again, except ``enabled``. + * + * Input parameters that updated while DAMON_RECLAIM is running are not applied + * by default. Once this parameter is set as ``Y``, DAMON_RECLAIM reads values + * of parametrs except ``enabled`` again. Once the re-reading is done, this + * parameter is set as ``N``. If invalid parameters are found while the + * re-reading, DAMON_RECLAIM will be disabled. + */ +static bool commit_inputs __read_mostly; +module_param(commit_inputs, bool, 0600); + +/* + * Time threshold for cold memory regions identification in microseconds. + * + * If a memory region is not accessed for this or longer time, DAMON_RECLAIM + * identifies the region as cold, and reclaims. 120 seconds by default. + */ +static unsigned long min_age __read_mostly = 120000000; +module_param(min_age, ulong, 0600); + +static struct damos_quota damon_reclaim_quota = { + /* use up to 10 ms time, reclaim up to 128 MiB per 1 sec by default */ + .ms = 10, + .sz = 128 * 1024 * 1024, + .reset_interval = 1000, + /* Within the quota, page out older regions first. */ + .weight_sz = 0, + .weight_nr_accesses = 0, + .weight_age = 1 +}; +DEFINE_DAMON_MODULES_DAMOS_QUOTAS(damon_reclaim_quota); + +/* + * Desired level of memory pressure-stall time in microseconds. + * + * While keeping the caps that set by other quotas, DAMON_RECLAIM automatically + * increases and decreases the effective level of the quota aiming this level of + * memory pressure is incurred. System-wide ``some`` memory PSI in microseconds + * per quota reset interval (``quota_reset_interval_ms``) is collected and + * compared to this value to see if the aim is satisfied. Value zero means + * disabling this auto-tuning feature. + * + * Disabled by default. + */ +static unsigned long quota_mem_pressure_us __read_mostly; +module_param(quota_mem_pressure_us, ulong, 0600); + +/* + * User-specifiable feedback for auto-tuning of the effective quota. + * + * While keeping the caps that set by other quotas, DAMON_RECLAIM automatically + * increases and decreases the effective level of the quota aiming receiving this + * feedback of value ``10,000`` from the user. DAMON_RECLAIM assumes the feedback + * value and the quota are positively proportional. Value zero means disabling + * this auto-tuning feature. + * + * Disabled by default. + * + */ +static unsigned long quota_autotune_feedback __read_mostly; +module_param(quota_autotune_feedback, ulong, 0600); + +static struct damos_watermarks damon_reclaim_wmarks = { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .interval = 5000000, /* 5 seconds */ + .high = 500, /* 50 percent */ + .mid = 400, /* 40 percent */ + .low = 200, /* 20 percent */ +}; +DEFINE_DAMON_MODULES_WMARKS_PARAMS(damon_reclaim_wmarks); + +static struct damon_attrs damon_reclaim_mon_attrs = { + .sample_interval = 5000, /* 5 ms */ + .aggr_interval = 100000, /* 100 ms */ + .ops_update_interval = 0, + .min_nr_regions = 10, + .max_nr_regions = 1000, +}; +DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(damon_reclaim_mon_attrs); + +/* + * Start of the target memory region in physical address. + * + * The start physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_start __read_mostly; +module_param(monitor_region_start, ulong, 0600); + +/* + * End of the target memory region in physical address. + * + * The end physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_end __read_mostly; +module_param(monitor_region_end, ulong, 0600); + +/* + * Scale factor for DAMON_RECLAIM to ops address conversion. + * + * This parameter must not be set to 0. + */ +static unsigned long addr_unit __read_mostly = 1; + +/* + * Skip anonymous pages reclamation. + * + * If this parameter is set as ``Y``, DAMON_RECLAIM does not reclaim anonymous + * pages. By default, ``N``. + */ +static bool skip_anon __read_mostly; +module_param(skip_anon, bool, 0600); + +/* + * PID of the DAMON thread + * + * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. + * Else, -1. + */ +static int kdamond_pid __read_mostly = -1; +module_param(kdamond_pid, int, 0400); + +static struct damos_stat damon_reclaim_stat; +DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_reclaim_stat, + reclaim_tried_regions, reclaimed_regions, quota_exceeds); + +static struct damon_ctx *ctx; +static struct damon_target *target; + +static struct damos *damon_reclaim_new_scheme(void) +{ + struct damos_access_pattern pattern = { + /* Find regions having PAGE_SIZE or larger size */ + .min_sz_region = PAGE_SIZE, + .max_sz_region = ULONG_MAX, + /* and not accessed at all */ + .min_nr_accesses = 0, + .max_nr_accesses = 0, + /* for min_age or more micro-seconds */ + .min_age_region = min_age / + damon_reclaim_mon_attrs.aggr_interval, + .max_age_region = UINT_MAX, + }; + + return damon_new_scheme( + &pattern, + /* page out those, as soon as found */ + DAMOS_PAGEOUT, + /* for each aggregation interval */ + 0, + /* under the quota. */ + &damon_reclaim_quota, + /* (De)activate this according to the watermarks. */ + &damon_reclaim_wmarks, + NUMA_NO_NODE); +} + +static int damon_reclaim_apply_parameters(void) +{ + struct damon_ctx *param_ctx; + struct damon_target *param_target; + struct damos *scheme; + struct damos_quota_goal *goal; + struct damos_filter *filter; + int err; + + err = damon_modules_new_paddr_ctx_target(¶m_ctx, ¶m_target); + if (err) + return err; + + /* + * If monitor_region_start/end are unset, always silently + * reset addr_unit to 1. + */ + if (!monitor_region_start && !monitor_region_end) + addr_unit = 1; + param_ctx->addr_unit = addr_unit; + param_ctx->min_sz_region = max(DAMON_MIN_REGION / addr_unit, 1); + + if (!damon_reclaim_mon_attrs.aggr_interval) { + err = -EINVAL; + goto out; + } + + err = damon_set_attrs(param_ctx, &damon_reclaim_mon_attrs); + if (err) + goto out; + + err = -ENOMEM; + scheme = damon_reclaim_new_scheme(); + if (!scheme) + goto out; + damon_set_schemes(param_ctx, &scheme, 1); + + if (quota_mem_pressure_us) { + goal = damos_new_quota_goal(DAMOS_QUOTA_SOME_MEM_PSI_US, + quota_mem_pressure_us); + if (!goal) + goto out; + damos_add_quota_goal(&scheme->quota, goal); + } + + if (quota_autotune_feedback) { + goal = damos_new_quota_goal(DAMOS_QUOTA_USER_INPUT, 10000); + if (!goal) + goto out; + goal->current_value = quota_autotune_feedback; + damos_add_quota_goal(&scheme->quota, goal); + } + + if (skip_anon) { + filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true, false); + if (!filter) + goto out; + damos_add_filter(scheme, filter); + } + + err = damon_set_region_biggest_system_ram_default(param_target, + &monitor_region_start, + &monitor_region_end, + param_ctx->min_sz_region); + if (err) + goto out; + err = damon_commit_ctx(ctx, param_ctx); +out: + damon_destroy_ctx(param_ctx); + return err; +} + +static int damon_reclaim_handle_commit_inputs(void) +{ + int err; + + if (!commit_inputs) + return 0; + + err = damon_reclaim_apply_parameters(); + commit_inputs = false; + return err; +} + +static int damon_reclaim_damon_call_fn(void *arg) +{ + struct damon_ctx *c = arg; + struct damos *s; + + /* update the stats parameter */ + damon_for_each_scheme(s, c) + damon_reclaim_stat = s->stat; + + return damon_reclaim_handle_commit_inputs(); +} + +static struct damon_call_control call_control = { + .fn = damon_reclaim_damon_call_fn, + .repeat = true, +}; + +static int damon_reclaim_turn(bool on) +{ + int err; + + if (!on) { + err = damon_stop(&ctx, 1); + if (!err) + kdamond_pid = -1; + return err; + } + + err = damon_reclaim_apply_parameters(); + if (err) + return err; + + err = damon_start(&ctx, 1, true); + if (err) + return err; + kdamond_pid = ctx->kdamond->pid; + return damon_call(ctx, &call_control); +} + +static int damon_reclaim_addr_unit_store(const char *val, + const struct kernel_param *kp) +{ + unsigned long input_addr_unit; + int err = kstrtoul(val, 0, &input_addr_unit); + + if (err) + return err; + if (!input_addr_unit) + return -EINVAL; + + addr_unit = input_addr_unit; + return 0; +} + +static const struct kernel_param_ops addr_unit_param_ops = { + .set = damon_reclaim_addr_unit_store, + .get = param_get_ulong, +}; + +module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600); +MODULE_PARM_DESC(addr_unit, + "Scale factor for DAMON_RECLAIM to ops address conversion (default: 1)"); + +static int damon_reclaim_enabled_store(const char *val, + const struct kernel_param *kp) +{ + bool is_enabled = enabled; + bool enable; + int err; + + err = kstrtobool(val, &enable); + if (err) + return err; + + if (is_enabled == enable) + return 0; + + /* Called before init function. The function will handle this. */ + if (!damon_initialized()) + goto set_param_out; + + err = damon_reclaim_turn(enable); + if (err) + return err; + +set_param_out: + enabled = enable; + return err; +} + +static const struct kernel_param_ops enabled_param_ops = { + .set = damon_reclaim_enabled_store, + .get = param_get_bool, +}; + +module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +MODULE_PARM_DESC(enabled, + "Enable or disable DAMON_RECLAIM (default: disabled)"); + +static int __init damon_reclaim_init(void) +{ + int err; + + if (!damon_initialized()) { + err = -ENOMEM; + goto out; + } + err = damon_modules_new_paddr_ctx_target(&ctx, &target); + if (err) + goto out; + + call_control.data = ctx; + + /* 'enabled' has set before this function, probably via command line */ + if (enabled) + err = damon_reclaim_turn(true); + +out: + if (err && enabled) + enabled = false; + return err; +} + +module_init(damon_reclaim_init); diff --git a/mm/damon/stat.c b/mm/damon/stat.c new file mode 100644 index 000000000000..ed8e3629d31a --- /dev/null +++ b/mm/damon/stat.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shows data access monitoring resutls in simple metrics. + */ + +#define pr_fmt(fmt) "damon-stat: " fmt + +#include <linux/damon.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sort.h> + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_stat." + +static int damon_stat_enabled_store( + const char *val, const struct kernel_param *kp); + +static const struct kernel_param_ops enabled_param_ops = { + .set = damon_stat_enabled_store, + .get = param_get_bool, +}; + +static bool enabled __read_mostly = IS_ENABLED( + CONFIG_DAMON_STAT_ENABLED_DEFAULT); +module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +MODULE_PARM_DESC(enabled, "Enable of disable DAMON_STAT"); + +static unsigned long estimated_memory_bandwidth __read_mostly; +module_param(estimated_memory_bandwidth, ulong, 0400); +MODULE_PARM_DESC(estimated_memory_bandwidth, + "Estimated memory bandwidth usage in bytes per second"); + +static long memory_idle_ms_percentiles[101] __read_mostly = {0,}; +module_param_array(memory_idle_ms_percentiles, long, NULL, 0400); +MODULE_PARM_DESC(memory_idle_ms_percentiles, + "Memory idle time percentiles in milliseconds"); + +static unsigned long aggr_interval_us; +module_param(aggr_interval_us, ulong, 0400); +MODULE_PARM_DESC(aggr_interval_us, + "Current tuned aggregation interval in microseconds"); + +static struct damon_ctx *damon_stat_context; + +static unsigned long damon_stat_last_refresh_jiffies; + +static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long access_bytes = 0; + + damon_for_each_target(t, c) { + damon_for_each_region(r, t) + access_bytes += (r->ar.end - r->ar.start) * + r->nr_accesses; + } + estimated_memory_bandwidth = access_bytes * USEC_PER_MSEC * + MSEC_PER_SEC / c->attrs.aggr_interval; +} + +static int damon_stat_idletime(const struct damon_region *r) +{ + if (r->nr_accesses) + return -1 * (r->age + 1); + return r->age + 1; +} + +static int damon_stat_cmp_regions(const void *a, const void *b) +{ + const struct damon_region *ra = *(const struct damon_region **)a; + const struct damon_region *rb = *(const struct damon_region **)b; + + return damon_stat_idletime(ra) - damon_stat_idletime(rb); +} + +static int damon_stat_sort_regions(struct damon_ctx *c, + struct damon_region ***sorted_ptr, int *nr_regions_ptr, + unsigned long *total_sz_ptr) +{ + struct damon_target *t; + struct damon_region *r; + struct damon_region **region_pointers; + unsigned int nr_regions = 0; + unsigned long total_sz = 0; + + damon_for_each_target(t, c) { + /* there is only one target */ + region_pointers = kmalloc_array(damon_nr_regions(t), + sizeof(*region_pointers), GFP_KERNEL); + if (!region_pointers) + return -ENOMEM; + damon_for_each_region(r, t) { + region_pointers[nr_regions++] = r; + total_sz += r->ar.end - r->ar.start; + } + } + sort(region_pointers, nr_regions, sizeof(*region_pointers), + damon_stat_cmp_regions, NULL); + *sorted_ptr = region_pointers; + *nr_regions_ptr = nr_regions; + *total_sz_ptr = total_sz; + return 0; +} + +static void damon_stat_set_idletime_percentiles(struct damon_ctx *c) +{ + struct damon_region **sorted_regions, *region; + int nr_regions; + unsigned long total_sz, accounted_bytes = 0; + int err, i, next_percentile = 0; + + err = damon_stat_sort_regions(c, &sorted_regions, &nr_regions, + &total_sz); + if (err) + return; + for (i = 0; i < nr_regions; i++) { + region = sorted_regions[i]; + accounted_bytes += region->ar.end - region->ar.start; + while (next_percentile <= accounted_bytes * 100 / total_sz) + memory_idle_ms_percentiles[next_percentile++] = + damon_stat_idletime(region) * + (long)c->attrs.aggr_interval / USEC_PER_MSEC; + } + kfree(sorted_regions); +} + +static int damon_stat_damon_call_fn(void *data) +{ + struct damon_ctx *c = data; + + /* avoid unnecessarily frequent stat update */ + if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies + + msecs_to_jiffies(5 * MSEC_PER_SEC))) + return 0; + damon_stat_last_refresh_jiffies = jiffies; + + aggr_interval_us = c->attrs.aggr_interval; + damon_stat_set_estimated_memory_bandwidth(c); + damon_stat_set_idletime_percentiles(c); + return 0; +} + +static struct damon_ctx *damon_stat_build_ctx(void) +{ + struct damon_ctx *ctx; + struct damon_attrs attrs; + struct damon_target *target; + unsigned long start = 0, end = 0; + + ctx = damon_new_ctx(); + if (!ctx) + return NULL; + attrs = (struct damon_attrs) { + .sample_interval = 5 * USEC_PER_MSEC, + .aggr_interval = 100 * USEC_PER_MSEC, + .ops_update_interval = 60 * USEC_PER_MSEC * MSEC_PER_SEC, + .min_nr_regions = 10, + .max_nr_regions = 1000, + }; + /* + * auto-tune sampling and aggregation interval aiming 4% DAMON-observed + * accesses ratio, keeping sampling interval in [5ms, 10s] range. + */ + attrs.intervals_goal = (struct damon_intervals_goal) { + .access_bp = 400, .aggrs = 3, + .min_sample_us = 5000, .max_sample_us = 10000000, + }; + if (damon_set_attrs(ctx, &attrs)) + goto free_out; + + /* + * auto-tune sampling and aggregation interval aiming 4% DAMON-observed + * accesses ratio, keeping sampling interval in [5ms, 10s] range. + */ + ctx->attrs.intervals_goal = (struct damon_intervals_goal) { + .access_bp = 400, .aggrs = 3, + .min_sample_us = 5000, .max_sample_us = 10000000, + }; + if (damon_select_ops(ctx, DAMON_OPS_PADDR)) + goto free_out; + + target = damon_new_target(); + if (!target) + goto free_out; + damon_add_target(ctx, target); + if (damon_set_region_biggest_system_ram_default(target, &start, &end, + ctx->min_sz_region)) + goto free_out; + return ctx; +free_out: + damon_destroy_ctx(ctx); + return NULL; +} + +static struct damon_call_control call_control = { + .fn = damon_stat_damon_call_fn, + .repeat = true, +}; + +static int damon_stat_start(void) +{ + int err; + + damon_stat_context = damon_stat_build_ctx(); + if (!damon_stat_context) + return -ENOMEM; + err = damon_start(&damon_stat_context, 1, true); + if (err) + return err; + + damon_stat_last_refresh_jiffies = jiffies; + call_control.data = damon_stat_context; + return damon_call(damon_stat_context, &call_control); +} + +static void damon_stat_stop(void) +{ + damon_stop(&damon_stat_context, 1); + damon_destroy_ctx(damon_stat_context); +} + +static int damon_stat_enabled_store( + const char *val, const struct kernel_param *kp) +{ + bool is_enabled = enabled; + int err; + + err = kstrtobool(val, &enabled); + if (err) + return err; + + if (is_enabled == enabled) + return 0; + + if (!damon_initialized()) + /* + * probably called from command line parsing (parse_args()). + * Cannot call damon_new_ctx(). Let damon_stat_init() handle. + */ + return 0; + + if (enabled) { + err = damon_stat_start(); + if (err) + enabled = false; + return err; + } + damon_stat_stop(); + return 0; +} + +static int __init damon_stat_init(void) +{ + int err = 0; + + if (!damon_initialized()) { + err = -ENOMEM; + goto out; + } + + /* probably set via command line */ + if (enabled) + err = damon_stat_start(); + +out: + if (err && enabled) + enabled = false; + return err; +} + +module_init(damon_stat_init); diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c new file mode 100644 index 000000000000..ffaf285e241a --- /dev/null +++ b/mm/damon/sysfs-common.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Code for DAMON Sysfs Interface + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#include <linux/slab.h> + +#include "sysfs-common.h" + +DEFINE_MUTEX(damon_sysfs_lock); + +/* + * unsigned long range directory + */ + +struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc( + unsigned long min, + unsigned long max) +{ + struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range), + GFP_KERNEL); + + if (!range) + return NULL; + range->kobj = (struct kobject){}; + range->min = min; + range->max = max; + + return range; +} + +static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + + return sysfs_emit(buf, "%lu\n", range->min); +} + +static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + unsigned long min; + int err; + + err = kstrtoul(buf, 0, &min); + if (err) + return err; + + range->min = min; + return count; +} + +static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + + return sysfs_emit(buf, "%lu\n", range->max); +} + +static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_ul_range *range = container_of(kobj, + struct damon_sysfs_ul_range, kobj); + unsigned long max; + int err; + + err = kstrtoul(buf, 0, &max); + if (err) + return err; + + range->max = max; + return count; +} + +void damon_sysfs_ul_range_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj)); +} + +static struct kobj_attribute damon_sysfs_ul_range_min_attr = + __ATTR_RW_MODE(min, 0600); + +static struct kobj_attribute damon_sysfs_ul_range_max_attr = + __ATTR_RW_MODE(max, 0600); + +static struct attribute *damon_sysfs_ul_range_attrs[] = { + &damon_sysfs_ul_range_min_attr.attr, + &damon_sysfs_ul_range_max_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_ul_range); + +const struct kobj_type damon_sysfs_ul_range_ktype = { + .release = damon_sysfs_ul_range_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_ul_range_groups, +}; + diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h new file mode 100644 index 000000000000..2099adee11d0 --- /dev/null +++ b/mm/damon/sysfs-common.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common Code for DAMON Sysfs Interface + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#include <linux/damon.h> +#include <linux/kobject.h> + +extern struct mutex damon_sysfs_lock; + +struct damon_sysfs_ul_range { + struct kobject kobj; + unsigned long min; + unsigned long max; +}; + +struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc( + unsigned long min, + unsigned long max); +void damon_sysfs_ul_range_release(struct kobject *kobj); + +extern const struct kobj_type damon_sysfs_ul_range_ktype; + +/* + * schemes directory + */ + +struct damon_sysfs_schemes { + struct kobject kobj; + struct damon_sysfs_scheme **schemes_arr; + int nr; +}; + +struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void); +void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes); + +extern const struct kobj_type damon_sysfs_schemes_ktype; + +int damon_sysfs_add_schemes(struct damon_ctx *ctx, + struct damon_sysfs_schemes *sysfs_schemes); + +void damon_sysfs_schemes_update_stats( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx); + +void damos_sysfs_populate_region_dir(struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos *s, + bool total_bytes_only, unsigned long sz_filter_passed); + +int damon_sysfs_schemes_clear_regions( + struct damon_sysfs_schemes *sysfs_schemes); + +int damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx); + +void damos_sysfs_update_effective_quotas( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx); diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c new file mode 100644 index 000000000000..30d20f5b3192 --- /dev/null +++ b/mm/damon/sysfs-schemes.c @@ -0,0 +1,2832 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON sysfs Interface + * + * Copyright (c) 2022 SeongJae Park <sj@kernel.org> + */ + +#include <linux/slab.h> +#include <linux/numa.h> + +#include "sysfs-common.h" + +/* + * scheme region directory + */ + +struct damon_sysfs_scheme_region { + struct kobject kobj; + struct damon_addr_range ar; + unsigned int nr_accesses; + unsigned int age; + unsigned long sz_filter_passed; + struct list_head list; +}; + +static struct damon_sysfs_scheme_region *damon_sysfs_scheme_region_alloc( + struct damon_region *region) +{ + struct damon_sysfs_scheme_region *sysfs_region = kmalloc( + sizeof(*sysfs_region), GFP_KERNEL); + + if (!sysfs_region) + return NULL; + sysfs_region->kobj = (struct kobject){}; + sysfs_region->ar = region->ar; + sysfs_region->nr_accesses = region->nr_accesses_bp / 10000; + sysfs_region->age = region->age; + INIT_LIST_HEAD(&sysfs_region->list); + return sysfs_region; +} + +static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%lu\n", region->ar.start); +} + +static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%lu\n", region->ar.end); +} + +static ssize_t nr_accesses_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%u\n", region->nr_accesses); +} + +static ssize_t age_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%u\n", region->age); +} + +static ssize_t sz_filter_passed_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + return sysfs_emit(buf, "%lu\n", region->sz_filter_passed); +} + +static void damon_sysfs_scheme_region_release(struct kobject *kobj) +{ + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + + list_del(®ion->list); + kfree(region); +} + +static struct kobj_attribute damon_sysfs_scheme_region_start_attr = + __ATTR_RO_MODE(start, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_end_attr = + __ATTR_RO_MODE(end, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_nr_accesses_attr = + __ATTR_RO_MODE(nr_accesses, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_age_attr = + __ATTR_RO_MODE(age, 0400); + +static struct kobj_attribute damon_sysfs_scheme_region_sz_filter_passed_attr = + __ATTR_RO_MODE(sz_filter_passed, 0400); + +static struct attribute *damon_sysfs_scheme_region_attrs[] = { + &damon_sysfs_scheme_region_start_attr.attr, + &damon_sysfs_scheme_region_end_attr.attr, + &damon_sysfs_scheme_region_nr_accesses_attr.attr, + &damon_sysfs_scheme_region_age_attr.attr, + &damon_sysfs_scheme_region_sz_filter_passed_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme_region); + +static const struct kobj_type damon_sysfs_scheme_region_ktype = { + .release = damon_sysfs_scheme_region_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_region_groups, +}; + +/* + * scheme regions directory + */ + +struct damon_sysfs_scheme_regions { + struct kobject kobj; + struct list_head regions_list; + int nr_regions; + unsigned long total_bytes; +}; + +static struct damon_sysfs_scheme_regions * +damon_sysfs_scheme_regions_alloc(void) +{ + struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions), + GFP_KERNEL); + + if (!regions) + return NULL; + + regions->kobj = (struct kobject){}; + INIT_LIST_HEAD(®ions->regions_list); + regions->nr_regions = 0; + regions->total_bytes = 0; + return regions; +} + +static ssize_t total_bytes_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_regions *regions = container_of(kobj, + struct damon_sysfs_scheme_regions, kobj); + + return sysfs_emit(buf, "%lu\n", regions->total_bytes); +} + +static void damon_sysfs_scheme_regions_rm_dirs( + struct damon_sysfs_scheme_regions *regions) +{ + struct damon_sysfs_scheme_region *r, *next; + + list_for_each_entry_safe(r, next, ®ions->regions_list, list) { + /* release function deletes it from the list */ + kobject_put(&r->kobj); + regions->nr_regions--; + } +} + +static void damon_sysfs_scheme_regions_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_scheme_regions, kobj)); +} + +static struct kobj_attribute damon_sysfs_scheme_regions_total_bytes_attr = + __ATTR_RO_MODE(total_bytes, 0400); + +static struct attribute *damon_sysfs_scheme_regions_attrs[] = { + &damon_sysfs_scheme_regions_total_bytes_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme_regions); + +static const struct kobj_type damon_sysfs_scheme_regions_ktype = { + .release = damon_sysfs_scheme_regions_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_regions_groups, +}; + +/* + * schemes/stats directory + */ + +struct damon_sysfs_stats { + struct kobject kobj; + unsigned long nr_tried; + unsigned long sz_tried; + unsigned long nr_applied; + unsigned long sz_applied; + unsigned long sz_ops_filter_passed; + unsigned long qt_exceeds; +}; + +static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL); +} + +static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->nr_tried); +} + +static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->sz_tried); +} + +static ssize_t nr_applied_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->nr_applied); +} + +static ssize_t sz_applied_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->sz_applied); +} + +static ssize_t sz_ops_filter_passed_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->sz_ops_filter_passed); +} + +static ssize_t qt_exceeds_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_stats *stats = container_of(kobj, + struct damon_sysfs_stats, kobj); + + return sysfs_emit(buf, "%lu\n", stats->qt_exceeds); +} + +static void damon_sysfs_stats_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_stats, kobj)); +} + +static struct kobj_attribute damon_sysfs_stats_nr_tried_attr = + __ATTR_RO_MODE(nr_tried, 0400); + +static struct kobj_attribute damon_sysfs_stats_sz_tried_attr = + __ATTR_RO_MODE(sz_tried, 0400); + +static struct kobj_attribute damon_sysfs_stats_nr_applied_attr = + __ATTR_RO_MODE(nr_applied, 0400); + +static struct kobj_attribute damon_sysfs_stats_sz_applied_attr = + __ATTR_RO_MODE(sz_applied, 0400); + +static struct kobj_attribute damon_sysfs_stats_sz_ops_filter_passed_attr = + __ATTR_RO_MODE(sz_ops_filter_passed, 0400); + +static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr = + __ATTR_RO_MODE(qt_exceeds, 0400); + +static struct attribute *damon_sysfs_stats_attrs[] = { + &damon_sysfs_stats_nr_tried_attr.attr, + &damon_sysfs_stats_sz_tried_attr.attr, + &damon_sysfs_stats_nr_applied_attr.attr, + &damon_sysfs_stats_sz_applied_attr.attr, + &damon_sysfs_stats_sz_ops_filter_passed_attr.attr, + &damon_sysfs_stats_qt_exceeds_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_stats); + +static const struct kobj_type damon_sysfs_stats_ktype = { + .release = damon_sysfs_stats_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_stats_groups, +}; + +/* + * filter directory + */ + +/* + * enum damos_sysfs_filter_handle_layer - Layers handling filters of a dir. + */ +enum damos_sysfs_filter_handle_layer { + DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH, +}; + +struct damon_sysfs_scheme_filter { + struct kobject kobj; + enum damos_sysfs_filter_handle_layer handle_layer; + enum damos_filter_type type; + bool matching; + bool allow; + char *memcg_path; + struct damon_addr_range addr_range; + struct damon_size_range sz_range; + int target_idx; +}; + +static struct damon_sysfs_scheme_filter *damon_sysfs_scheme_filter_alloc( + enum damos_sysfs_filter_handle_layer layer) +{ + struct damon_sysfs_scheme_filter *filter; + + filter = kzalloc(sizeof(struct damon_sysfs_scheme_filter), GFP_KERNEL); + if (filter) + filter->handle_layer = layer; + return filter; +} + +struct damos_sysfs_filter_type_name { + enum damos_filter_type type; + char *name; +}; + +static const struct damos_sysfs_filter_type_name +damos_sysfs_filter_type_names[] = { + { + .type = DAMOS_FILTER_TYPE_ANON, + .name = "anon", + }, + { + .type = DAMOS_FILTER_TYPE_ACTIVE, + .name = "active", + }, + { + .type = DAMOS_FILTER_TYPE_MEMCG, + .name = "memcg", + }, + { + .type = DAMOS_FILTER_TYPE_YOUNG, + .name = "young", + }, + { + .type = DAMOS_FILTER_TYPE_HUGEPAGE_SIZE, + .name = "hugepage_size", + }, + { + .type = DAMOS_FILTER_TYPE_UNMAPPED, + .name = "unmapped", + }, + { + .type = DAMOS_FILTER_TYPE_ADDR, + .name = "addr", + }, + { + .type = DAMOS_FILTER_TYPE_TARGET, + .name = "target", + }, +}; + +static ssize_t type_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(damos_sysfs_filter_type_names); i++) { + const struct damos_sysfs_filter_type_name *type_name; + + type_name = &damos_sysfs_filter_type_names[i]; + if (type_name->type == filter->type) + return sysfs_emit(buf, "%s\n", type_name->name); + } + return -EINVAL; +} + +static bool damos_sysfs_scheme_filter_valid_type( + enum damos_sysfs_filter_handle_layer layer, + enum damos_filter_type type) +{ + switch (layer) { + case DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH: + return true; + case DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE: + return !damos_filter_for_ops(type); + case DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS: + return damos_filter_for_ops(type); + default: + break; + } + return false; +} + +static ssize_t type_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + ssize_t ret = -EINVAL; + int i; + + for (i = 0; i < ARRAY_SIZE(damos_sysfs_filter_type_names); i++) { + const struct damos_sysfs_filter_type_name *type_name; + + type_name = &damos_sysfs_filter_type_names[i]; + if (sysfs_streq(buf, type_name->name)) { + if (!damos_sysfs_scheme_filter_valid_type( + filter->handle_layer, + type_name->type)) + break; + filter->type = type_name->type; + ret = count; + break; + } + } + return ret; +} + +static ssize_t matching_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%c\n", filter->matching ? 'Y' : 'N'); +} + +static ssize_t matching_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + bool matching; + int err = kstrtobool(buf, &matching); + + if (err) + return err; + + filter->matching = matching; + return count; +} + +static ssize_t allow_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%c\n", filter->allow ? 'Y' : 'N'); +} + +static ssize_t allow_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + bool allow; + int err = kstrtobool(buf, &allow); + + if (err) + return err; + + filter->allow = allow; + return count; +} + +static ssize_t memcg_path_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%s\n", + filter->memcg_path ? filter->memcg_path : ""); +} + +static ssize_t memcg_path_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + char *path = kmalloc_array(size_add(count, 1), sizeof(*path), + GFP_KERNEL); + + if (!path) + return -ENOMEM; + + strscpy(path, buf, count + 1); + kfree(filter->memcg_path); + filter->memcg_path = path; + return count; +} + +static ssize_t addr_start_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%lu\n", filter->addr_range.start); +} + +static ssize_t addr_start_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + int err = kstrtoul(buf, 0, &filter->addr_range.start); + + return err ? err : count; +} + +static ssize_t addr_end_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%lu\n", filter->addr_range.end); +} + +static ssize_t addr_end_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + int err = kstrtoul(buf, 0, &filter->addr_range.end); + + return err ? err : count; +} + +static ssize_t min_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%lu\n", filter->sz_range.min); +} + +static ssize_t min_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + int err = kstrtoul(buf, 0, &filter->sz_range.min); + + return err ? err : count; +} + +static ssize_t max_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%lu\n", filter->sz_range.max); +} + +static ssize_t max_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + int err = kstrtoul(buf, 0, &filter->sz_range.max); + + return err ? err : count; +} + +static ssize_t damon_target_idx_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + return sysfs_emit(buf, "%d\n", filter->target_idx); +} + +static ssize_t damon_target_idx_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + int err = kstrtoint(buf, 0, &filter->target_idx); + + return err ? err : count; +} + +static void damon_sysfs_scheme_filter_release(struct kobject *kobj) +{ + struct damon_sysfs_scheme_filter *filter = container_of(kobj, + struct damon_sysfs_scheme_filter, kobj); + + kfree(filter->memcg_path); + kfree(filter); +} + +static struct kobj_attribute damon_sysfs_scheme_filter_type_attr = + __ATTR_RW_MODE(type, 0600); + +static struct kobj_attribute damon_sysfs_scheme_filter_matching_attr = + __ATTR_RW_MODE(matching, 0600); + +static struct kobj_attribute damon_sysfs_scheme_filter_allow_attr = + __ATTR_RW_MODE(allow, 0600); + +static struct kobj_attribute damon_sysfs_scheme_filter_memcg_path_attr = + __ATTR_RW_MODE(memcg_path, 0600); + +static struct kobj_attribute damon_sysfs_scheme_filter_addr_start_attr = + __ATTR_RW_MODE(addr_start, 0600); + +static struct kobj_attribute damon_sysfs_scheme_filter_addr_end_attr = + __ATTR_RW_MODE(addr_end, 0600); + +static struct kobj_attribute damon_sysfs_scheme_filter_min_attr = + __ATTR_RW_MODE(min, 0600); + +static struct kobj_attribute damon_sysfs_scheme_filter_max_attr = + __ATTR_RW_MODE(max, 0600); + +static struct kobj_attribute damon_sysfs_scheme_filter_damon_target_idx_attr = + __ATTR_RW_MODE(damon_target_idx, 0600); + +static struct attribute *damon_sysfs_scheme_filter_attrs[] = { + &damon_sysfs_scheme_filter_type_attr.attr, + &damon_sysfs_scheme_filter_matching_attr.attr, + &damon_sysfs_scheme_filter_allow_attr.attr, + &damon_sysfs_scheme_filter_memcg_path_attr.attr, + &damon_sysfs_scheme_filter_addr_start_attr.attr, + &damon_sysfs_scheme_filter_addr_end_attr.attr, + &damon_sysfs_scheme_filter_min_attr.attr, + &damon_sysfs_scheme_filter_max_attr.attr, + &damon_sysfs_scheme_filter_damon_target_idx_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme_filter); + +static const struct kobj_type damon_sysfs_scheme_filter_ktype = { + .release = damon_sysfs_scheme_filter_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_filter_groups, +}; + +/* + * filters directory + */ + +struct damon_sysfs_scheme_filters { + struct kobject kobj; + enum damos_sysfs_filter_handle_layer handle_layer; + struct damon_sysfs_scheme_filter **filters_arr; + int nr; +}; + +static struct damon_sysfs_scheme_filters * +damon_sysfs_scheme_filters_alloc(enum damos_sysfs_filter_handle_layer layer) +{ + struct damon_sysfs_scheme_filters *filters; + + filters = kzalloc(sizeof(struct damon_sysfs_scheme_filters), GFP_KERNEL); + if (filters) + filters->handle_layer = layer; + return filters; +} + +static void damon_sysfs_scheme_filters_rm_dirs( + struct damon_sysfs_scheme_filters *filters) +{ + struct damon_sysfs_scheme_filter **filters_arr = filters->filters_arr; + int i; + + for (i = 0; i < filters->nr; i++) + kobject_put(&filters_arr[i]->kobj); + filters->nr = 0; + kfree(filters_arr); + filters->filters_arr = NULL; +} + +static int damon_sysfs_scheme_filters_add_dirs( + struct damon_sysfs_scheme_filters *filters, int nr_filters) +{ + struct damon_sysfs_scheme_filter **filters_arr, *filter; + int err, i; + + damon_sysfs_scheme_filters_rm_dirs(filters); + if (!nr_filters) + return 0; + + filters_arr = kmalloc_array(nr_filters, sizeof(*filters_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!filters_arr) + return -ENOMEM; + filters->filters_arr = filters_arr; + + for (i = 0; i < nr_filters; i++) { + filter = damon_sysfs_scheme_filter_alloc( + filters->handle_layer); + if (!filter) { + damon_sysfs_scheme_filters_rm_dirs(filters); + return -ENOMEM; + } + + err = kobject_init_and_add(&filter->kobj, + &damon_sysfs_scheme_filter_ktype, + &filters->kobj, "%d", i); + if (err) { + kobject_put(&filter->kobj); + damon_sysfs_scheme_filters_rm_dirs(filters); + return err; + } + + filters_arr[i] = filter; + filters->nr++; + } + return 0; +} + +static ssize_t nr_filters_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme_filters *filters = container_of(kobj, + struct damon_sysfs_scheme_filters, kobj); + + return sysfs_emit(buf, "%d\n", filters->nr); +} + +static ssize_t nr_filters_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme_filters *filters; + int nr, err = kstrtoint(buf, 0, &nr); + + if (err) + return err; + if (nr < 0) + return -EINVAL; + + filters = container_of(kobj, struct damon_sysfs_scheme_filters, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damon_sysfs_scheme_filters_add_dirs(filters, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + + return count; +} + +static void damon_sysfs_scheme_filters_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_scheme_filters, kobj)); +} + +static struct kobj_attribute damon_sysfs_scheme_filters_nr_attr = + __ATTR_RW_MODE(nr_filters, 0600); + +static struct attribute *damon_sysfs_scheme_filters_attrs[] = { + &damon_sysfs_scheme_filters_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme_filters); + +static const struct kobj_type damon_sysfs_scheme_filters_ktype = { + .release = damon_sysfs_scheme_filters_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_filters_groups, +}; + +/* + * watermarks directory + */ + +struct damon_sysfs_watermarks { + struct kobject kobj; + enum damos_wmark_metric metric; + unsigned long interval_us; + unsigned long high; + unsigned long mid; + unsigned long low; +}; + +static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc( + enum damos_wmark_metric metric, unsigned long interval_us, + unsigned long high, unsigned long mid, unsigned long low) +{ + struct damon_sysfs_watermarks *watermarks = kmalloc( + sizeof(*watermarks), GFP_KERNEL); + + if (!watermarks) + return NULL; + watermarks->kobj = (struct kobject){}; + watermarks->metric = metric; + watermarks->interval_us = interval_us; + watermarks->high = high; + watermarks->mid = mid; + watermarks->low = low; + return watermarks; +} + +struct damos_sysfs_wmark_metric_name { + enum damos_wmark_metric metric; + char *name; +}; + +static const struct damos_sysfs_wmark_metric_name +damos_sysfs_wmark_metric_names[] = { + { + .metric = DAMOS_WMARK_NONE, + .name = "none", + }, + { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .name = "free_mem_rate", + }, +}; + +static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(damos_sysfs_wmark_metric_names); i++) { + const struct damos_sysfs_wmark_metric_name *metric_name; + + metric_name = &damos_sysfs_wmark_metric_names[i]; + if (metric_name->metric == watermarks->metric) + return sysfs_emit(buf, "%s\n", metric_name->name); + } + return -EINVAL; +} + +static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(damos_sysfs_wmark_metric_names); i++) { + const struct damos_sysfs_wmark_metric_name *metric_name; + + metric_name = &damos_sysfs_wmark_metric_names[i]; + if (sysfs_streq(buf, metric_name->name)) { + watermarks->metric = metric_name->metric; + return count; + } + } + return -EINVAL; +} + +static ssize_t interval_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->interval_us); +} + +static ssize_t interval_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->interval_us); + + return err ? err : count; +} + +static ssize_t high_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->high); +} + +static ssize_t high_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->high); + + return err ? err : count; +} + +static ssize_t mid_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->mid); +} + +static ssize_t mid_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->mid); + + return err ? err : count; +} + +static ssize_t low_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + + return sysfs_emit(buf, "%lu\n", watermarks->low); +} + +static ssize_t low_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_watermarks *watermarks = container_of(kobj, + struct damon_sysfs_watermarks, kobj); + int err = kstrtoul(buf, 0, &watermarks->low); + + return err ? err : count; +} + +static void damon_sysfs_watermarks_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj)); +} + +static struct kobj_attribute damon_sysfs_watermarks_metric_attr = + __ATTR_RW_MODE(metric, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr = + __ATTR_RW_MODE(interval_us, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_high_attr = + __ATTR_RW_MODE(high, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_mid_attr = + __ATTR_RW_MODE(mid, 0600); + +static struct kobj_attribute damon_sysfs_watermarks_low_attr = + __ATTR_RW_MODE(low, 0600); + +static struct attribute *damon_sysfs_watermarks_attrs[] = { + &damon_sysfs_watermarks_metric_attr.attr, + &damon_sysfs_watermarks_interval_us_attr.attr, + &damon_sysfs_watermarks_high_attr.attr, + &damon_sysfs_watermarks_mid_attr.attr, + &damon_sysfs_watermarks_low_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_watermarks); + +static const struct kobj_type damon_sysfs_watermarks_ktype = { + .release = damon_sysfs_watermarks_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_watermarks_groups, +}; + +/* + * quota goal directory + */ + +struct damos_sysfs_quota_goal { + struct kobject kobj; + enum damos_quota_goal_metric metric; + unsigned long target_value; + unsigned long current_value; + int nid; + char *path; +}; + +static struct damos_sysfs_quota_goal *damos_sysfs_quota_goal_alloc(void) +{ + return kzalloc(sizeof(struct damos_sysfs_quota_goal), GFP_KERNEL); +} + +struct damos_sysfs_qgoal_metric_name { + enum damos_quota_goal_metric metric; + char *name; +}; + +static +struct damos_sysfs_qgoal_metric_name damos_sysfs_qgoal_metric_names[] = { + { + .metric = DAMOS_QUOTA_USER_INPUT, + .name = "user_input", + }, + { + .metric = DAMOS_QUOTA_SOME_MEM_PSI_US, + .name = "some_mem_psi_us", + }, + { + .metric = DAMOS_QUOTA_NODE_MEM_USED_BP, + .name = "node_mem_used_bp", + }, + { + .metric = DAMOS_QUOTA_NODE_MEM_FREE_BP, + .name = "node_mem_free_bp", + }, + { + .metric = DAMOS_QUOTA_NODE_MEMCG_USED_BP, + .name = "node_memcg_used_bp", + }, + { + .metric = DAMOS_QUOTA_NODE_MEMCG_FREE_BP, + .name = "node_memcg_free_bp", + }, +}; + +static ssize_t target_metric_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, + struct damos_sysfs_quota_goal, kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(damos_sysfs_qgoal_metric_names); i++) { + struct damos_sysfs_qgoal_metric_name *metric_name; + + metric_name = &damos_sysfs_qgoal_metric_names[i]; + if (metric_name->metric == goal->metric) + return sysfs_emit(buf, "%s\n", metric_name->name); + } + return -EINVAL; +} + +static ssize_t target_metric_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, + struct damos_sysfs_quota_goal, kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(damos_sysfs_qgoal_metric_names); i++) { + struct damos_sysfs_qgoal_metric_name *metric_name; + + metric_name = &damos_sysfs_qgoal_metric_names[i]; + if (sysfs_streq(buf, metric_name->name)) { + goal->metric = metric_name->metric; + return count; + } + } + return -EINVAL; +} + +static ssize_t target_value_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, struct + damos_sysfs_quota_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->target_value); +} + +static ssize_t target_value_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, struct + damos_sysfs_quota_goal, kobj); + int err = kstrtoul(buf, 0, &goal->target_value); + + return err ? err : count; +} + +static ssize_t current_value_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, struct + damos_sysfs_quota_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->current_value); +} + +static ssize_t current_value_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, struct + damos_sysfs_quota_goal, kobj); + int err = kstrtoul(buf, 0, &goal->current_value); + + /* feed callback should check existence of this file and read value */ + return err ? err : count; +} + +static ssize_t nid_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, struct + damos_sysfs_quota_goal, kobj); + + + return sysfs_emit(buf, "%d\n", goal->nid); +} + +static ssize_t nid_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, struct + damos_sysfs_quota_goal, kobj); + int err = kstrtoint(buf, 0, &goal->nid); + + /* feed callback should check existence of this file and read value */ + return err ? err : count; +} + +static ssize_t path_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, + struct damos_sysfs_quota_goal, kobj); + + return sysfs_emit(buf, "%s\n", goal->path ? goal->path : ""); +} + +static ssize_t path_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, + struct damos_sysfs_quota_goal, kobj); + char *path = kmalloc_array(size_add(count, 1), sizeof(*path), + GFP_KERNEL); + + if (!path) + return -ENOMEM; + + strscpy(path, buf, count + 1); + kfree(goal->path); + goal->path = path; + return count; +} + +static void damos_sysfs_quota_goal_release(struct kobject *kobj) +{ + struct damos_sysfs_quota_goal *goal = container_of(kobj, + struct damos_sysfs_quota_goal, kobj); + + kfree(goal->path); + kfree(goal); +} + +static struct kobj_attribute damos_sysfs_quota_goal_target_metric_attr = + __ATTR_RW_MODE(target_metric, 0600); + +static struct kobj_attribute damos_sysfs_quota_goal_target_value_attr = + __ATTR_RW_MODE(target_value, 0600); + +static struct kobj_attribute damos_sysfs_quota_goal_current_value_attr = + __ATTR_RW_MODE(current_value, 0600); + +static struct kobj_attribute damos_sysfs_quota_goal_nid_attr = + __ATTR_RW_MODE(nid, 0600); + +static struct kobj_attribute damos_sysfs_quota_goal_path_attr = + __ATTR_RW_MODE(path, 0600); + +static struct attribute *damos_sysfs_quota_goal_attrs[] = { + &damos_sysfs_quota_goal_target_metric_attr.attr, + &damos_sysfs_quota_goal_target_value_attr.attr, + &damos_sysfs_quota_goal_current_value_attr.attr, + &damos_sysfs_quota_goal_nid_attr.attr, + &damos_sysfs_quota_goal_path_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damos_sysfs_quota_goal); + +static const struct kobj_type damos_sysfs_quota_goal_ktype = { + .release = damos_sysfs_quota_goal_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damos_sysfs_quota_goal_groups, +}; + +/* + * quota goals directory + */ + +struct damos_sysfs_quota_goals { + struct kobject kobj; + struct damos_sysfs_quota_goal **goals_arr; /* counted by nr */ + int nr; +}; + +static struct damos_sysfs_quota_goals *damos_sysfs_quota_goals_alloc(void) +{ + return kzalloc(sizeof(struct damos_sysfs_quota_goals), GFP_KERNEL); +} + +static void damos_sysfs_quota_goals_rm_dirs( + struct damos_sysfs_quota_goals *goals) +{ + struct damos_sysfs_quota_goal **goals_arr = goals->goals_arr; + int i; + + for (i = 0; i < goals->nr; i++) + kobject_put(&goals_arr[i]->kobj); + goals->nr = 0; + kfree(goals_arr); + goals->goals_arr = NULL; +} + +static int damos_sysfs_quota_goals_add_dirs( + struct damos_sysfs_quota_goals *goals, int nr_goals) +{ + struct damos_sysfs_quota_goal **goals_arr, *goal; + int err, i; + + damos_sysfs_quota_goals_rm_dirs(goals); + if (!nr_goals) + return 0; + + goals_arr = kmalloc_array(nr_goals, sizeof(*goals_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!goals_arr) + return -ENOMEM; + goals->goals_arr = goals_arr; + + for (i = 0; i < nr_goals; i++) { + goal = damos_sysfs_quota_goal_alloc(); + if (!goal) { + damos_sysfs_quota_goals_rm_dirs(goals); + return -ENOMEM; + } + + err = kobject_init_and_add(&goal->kobj, + &damos_sysfs_quota_goal_ktype, &goals->kobj, + "%d", i); + if (err) { + kobject_put(&goal->kobj); + damos_sysfs_quota_goals_rm_dirs(goals); + return err; + } + + goals_arr[i] = goal; + goals->nr++; + } + return 0; +} + +static ssize_t nr_goals_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damos_sysfs_quota_goals *goals = container_of(kobj, + struct damos_sysfs_quota_goals, kobj); + + return sysfs_emit(buf, "%d\n", goals->nr); +} + +static ssize_t nr_goals_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damos_sysfs_quota_goals *goals; + int nr, err = kstrtoint(buf, 0, &nr); + + if (err) + return err; + if (nr < 0) + return -EINVAL; + + goals = container_of(kobj, struct damos_sysfs_quota_goals, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damos_sysfs_quota_goals_add_dirs(goals, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + + return count; +} + +static void damos_sysfs_quota_goals_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damos_sysfs_quota_goals, kobj)); +} + +static struct kobj_attribute damos_sysfs_quota_goals_nr_attr = + __ATTR_RW_MODE(nr_goals, 0600); + +static struct attribute *damos_sysfs_quota_goals_attrs[] = { + &damos_sysfs_quota_goals_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damos_sysfs_quota_goals); + +static const struct kobj_type damos_sysfs_quota_goals_ktype = { + .release = damos_sysfs_quota_goals_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damos_sysfs_quota_goals_groups, +}; + +/* + * scheme/weights directory + */ + +struct damon_sysfs_weights { + struct kobject kobj; + unsigned int sz; + unsigned int nr_accesses; + unsigned int age; +}; + +static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz, + unsigned int nr_accesses, unsigned int age) +{ + struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights), + GFP_KERNEL); + + if (!weights) + return NULL; + weights->kobj = (struct kobject){}; + weights->sz = sz; + weights->nr_accesses = nr_accesses; + weights->age = age; + return weights; +} + +static ssize_t sz_permil_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + + return sysfs_emit(buf, "%u\n", weights->sz); +} + +static ssize_t sz_permil_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + int err = kstrtouint(buf, 0, &weights->sz); + + return err ? err : count; +} + +static ssize_t nr_accesses_permil_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + + return sysfs_emit(buf, "%u\n", weights->nr_accesses); +} + +static ssize_t nr_accesses_permil_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + int err = kstrtouint(buf, 0, &weights->nr_accesses); + + return err ? err : count; +} + +static ssize_t age_permil_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + + return sysfs_emit(buf, "%u\n", weights->age); +} + +static ssize_t age_permil_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_weights *weights = container_of(kobj, + struct damon_sysfs_weights, kobj); + int err = kstrtouint(buf, 0, &weights->age); + + return err ? err : count; +} + +static void damon_sysfs_weights_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_weights, kobj)); +} + +static struct kobj_attribute damon_sysfs_weights_sz_attr = + __ATTR_RW_MODE(sz_permil, 0600); + +static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr = + __ATTR_RW_MODE(nr_accesses_permil, 0600); + +static struct kobj_attribute damon_sysfs_weights_age_attr = + __ATTR_RW_MODE(age_permil, 0600); + +static struct attribute *damon_sysfs_weights_attrs[] = { + &damon_sysfs_weights_sz_attr.attr, + &damon_sysfs_weights_nr_accesses_attr.attr, + &damon_sysfs_weights_age_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_weights); + +static const struct kobj_type damon_sysfs_weights_ktype = { + .release = damon_sysfs_weights_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_weights_groups, +}; + +/* + * quotas directory + */ + +struct damon_sysfs_quotas { + struct kobject kobj; + struct damon_sysfs_weights *weights; + struct damos_sysfs_quota_goals *goals; + unsigned long ms; + unsigned long sz; + unsigned long reset_interval_ms; + unsigned long effective_sz; /* Effective size quota in bytes */ +}; + +static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL); +} + +static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas) +{ + struct damon_sysfs_weights *weights; + struct damos_sysfs_quota_goals *goals; + int err; + + weights = damon_sysfs_weights_alloc(0, 0, 0); + if (!weights) + return -ENOMEM; + + err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype, + "as->kobj, "weights"); + if (err) { + kobject_put(&weights->kobj); + return err; + } + quotas->weights = weights; + + goals = damos_sysfs_quota_goals_alloc(); + if (!goals) { + kobject_put(&weights->kobj); + return -ENOMEM; + } + err = kobject_init_and_add(&goals->kobj, + &damos_sysfs_quota_goals_ktype, "as->kobj, + "goals"); + if (err) { + kobject_put(&weights->kobj); + kobject_put(&goals->kobj); + } else { + quotas->goals = goals; + } + + return err; +} + +static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas) +{ + kobject_put("as->weights->kobj); + damos_sysfs_quota_goals_rm_dirs(quotas->goals); + kobject_put("as->goals->kobj); +} + +static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->ms); +} + +static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + int err = kstrtoul(buf, 0, "as->ms); + + if (err) + return -EINVAL; + return count; +} + +static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->sz); +} + +static ssize_t bytes_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + int err = kstrtoul(buf, 0, "as->sz); + + if (err) + return -EINVAL; + return count; +} + +static ssize_t reset_interval_ms_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms); +} + +static ssize_t reset_interval_ms_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + int err = kstrtoul(buf, 0, "as->reset_interval_ms); + + if (err) + return -EINVAL; + return count; +} + +static ssize_t effective_bytes_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_quotas *quotas = container_of(kobj, + struct damon_sysfs_quotas, kobj); + + return sysfs_emit(buf, "%lu\n", quotas->effective_sz); +} + +static void damon_sysfs_quotas_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_quotas, kobj)); +} + +static struct kobj_attribute damon_sysfs_quotas_ms_attr = + __ATTR_RW_MODE(ms, 0600); + +static struct kobj_attribute damon_sysfs_quotas_sz_attr = + __ATTR_RW_MODE(bytes, 0600); + +static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr = + __ATTR_RW_MODE(reset_interval_ms, 0600); + +static struct kobj_attribute damon_sysfs_quotas_effective_bytes_attr = + __ATTR_RO_MODE(effective_bytes, 0400); + +static struct attribute *damon_sysfs_quotas_attrs[] = { + &damon_sysfs_quotas_ms_attr.attr, + &damon_sysfs_quotas_sz_attr.attr, + &damon_sysfs_quotas_reset_interval_ms_attr.attr, + &damon_sysfs_quotas_effective_bytes_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_quotas); + +static const struct kobj_type damon_sysfs_quotas_ktype = { + .release = damon_sysfs_quotas_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_quotas_groups, +}; + +/* + * access_pattern directory + */ + +struct damon_sysfs_access_pattern { + struct kobject kobj; + struct damon_sysfs_ul_range *sz; + struct damon_sysfs_ul_range *nr_accesses; + struct damon_sysfs_ul_range *age; +}; + +static +struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void) +{ + struct damon_sysfs_access_pattern *access_pattern = + kmalloc(sizeof(*access_pattern), GFP_KERNEL); + + if (!access_pattern) + return NULL; + access_pattern->kobj = (struct kobject){}; + return access_pattern; +} + +static int damon_sysfs_access_pattern_add_range_dir( + struct damon_sysfs_access_pattern *access_pattern, + struct damon_sysfs_ul_range **range_dir_ptr, + char *name) +{ + struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0); + int err; + + if (!range) + return -ENOMEM; + err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype, + &access_pattern->kobj, "%s", name); + if (err) + kobject_put(&range->kobj); + else + *range_dir_ptr = range; + return err; +} + +static int damon_sysfs_access_pattern_add_dirs( + struct damon_sysfs_access_pattern *access_pattern) +{ + int err; + + err = damon_sysfs_access_pattern_add_range_dir(access_pattern, + &access_pattern->sz, "sz"); + if (err) + goto put_sz_out; + + err = damon_sysfs_access_pattern_add_range_dir(access_pattern, + &access_pattern->nr_accesses, "nr_accesses"); + if (err) + goto put_nr_accesses_sz_out; + + err = damon_sysfs_access_pattern_add_range_dir(access_pattern, + &access_pattern->age, "age"); + if (err) + goto put_age_nr_accesses_sz_out; + return 0; + +put_age_nr_accesses_sz_out: + kobject_put(&access_pattern->age->kobj); + access_pattern->age = NULL; +put_nr_accesses_sz_out: + kobject_put(&access_pattern->nr_accesses->kobj); + access_pattern->nr_accesses = NULL; +put_sz_out: + kobject_put(&access_pattern->sz->kobj); + access_pattern->sz = NULL; + return err; +} + +static void damon_sysfs_access_pattern_rm_dirs( + struct damon_sysfs_access_pattern *access_pattern) +{ + kobject_put(&access_pattern->sz->kobj); + kobject_put(&access_pattern->nr_accesses->kobj); + kobject_put(&access_pattern->age->kobj); +} + +static void damon_sysfs_access_pattern_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj)); +} + +static struct attribute *damon_sysfs_access_pattern_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_access_pattern); + +static const struct kobj_type damon_sysfs_access_pattern_ktype = { + .release = damon_sysfs_access_pattern_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_access_pattern_groups, +}; + +/* + * dest (action destination) directory + */ + +struct damos_sysfs_dest { + struct kobject kobj; + unsigned int id; + unsigned int weight; +}; + +static struct damos_sysfs_dest *damos_sysfs_dest_alloc(void) +{ + return kzalloc(sizeof(struct damos_sysfs_dest), GFP_KERNEL); +} + +static ssize_t id_show( + struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct damos_sysfs_dest *dest = container_of(kobj, + struct damos_sysfs_dest, kobj); + + return sysfs_emit(buf, "%u\n", dest->id); +} + +static ssize_t id_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damos_sysfs_dest *dest = container_of(kobj, + struct damos_sysfs_dest, kobj); + int err = kstrtouint(buf, 0, &dest->id); + + return err ? err : count; +} + +static ssize_t weight_show( + struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct damos_sysfs_dest *dest = container_of(kobj, + struct damos_sysfs_dest, kobj); + + return sysfs_emit(buf, "%u\n", dest->weight); +} + +static ssize_t weight_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damos_sysfs_dest *dest = container_of(kobj, + struct damos_sysfs_dest, kobj); + int err = kstrtouint(buf, 0, &dest->weight); + + return err ? err : count; +} + +static void damos_sysfs_dest_release(struct kobject *kobj) +{ + struct damos_sysfs_dest *dest = container_of(kobj, + struct damos_sysfs_dest, kobj); + kfree(dest); +} + +static struct kobj_attribute damos_sysfs_dest_id_attr = + __ATTR_RW_MODE(id, 0600); + +static struct kobj_attribute damos_sysfs_dest_weight_attr = + __ATTR_RW_MODE(weight, 0600); + +static struct attribute *damos_sysfs_dest_attrs[] = { + &damos_sysfs_dest_id_attr.attr, + &damos_sysfs_dest_weight_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damos_sysfs_dest); + +static const struct kobj_type damos_sysfs_dest_ktype = { + .release = damos_sysfs_dest_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damos_sysfs_dest_groups, +}; + +/* + * dests (action destinations) directory + */ + +struct damos_sysfs_dests { + struct kobject kobj; + struct damos_sysfs_dest **dests_arr; + int nr; +}; + +static struct damos_sysfs_dests * +damos_sysfs_dests_alloc(void) +{ + return kzalloc(sizeof(struct damos_sysfs_dests), GFP_KERNEL); +} + +static void damos_sysfs_dests_rm_dirs( + struct damos_sysfs_dests *dests) +{ + struct damos_sysfs_dest **dests_arr = dests->dests_arr; + int i; + + for (i = 0; i < dests->nr; i++) + kobject_put(&dests_arr[i]->kobj); + dests->nr = 0; + kfree(dests_arr); + dests->dests_arr = NULL; +} + +static int damos_sysfs_dests_add_dirs( + struct damos_sysfs_dests *dests, int nr_dests) +{ + struct damos_sysfs_dest **dests_arr, *dest; + int err, i; + + damos_sysfs_dests_rm_dirs(dests); + if (!nr_dests) + return 0; + + dests_arr = kmalloc_array(nr_dests, sizeof(*dests_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!dests_arr) + return -ENOMEM; + dests->dests_arr = dests_arr; + + for (i = 0; i < nr_dests; i++) { + dest = damos_sysfs_dest_alloc(); + if (!dest) { + damos_sysfs_dests_rm_dirs(dests); + return -ENOMEM; + } + + err = kobject_init_and_add(&dest->kobj, + &damos_sysfs_dest_ktype, + &dests->kobj, "%d", i); + if (err) { + kobject_put(&dest->kobj); + damos_sysfs_dests_rm_dirs(dests); + return err; + } + + dests_arr[i] = dest; + dests->nr++; + } + return 0; +} + +static ssize_t nr_dests_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damos_sysfs_dests *dests = container_of(kobj, + struct damos_sysfs_dests, kobj); + + return sysfs_emit(buf, "%d\n", dests->nr); +} + +static ssize_t nr_dests_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damos_sysfs_dests *dests; + int nr, err = kstrtoint(buf, 0, &nr); + + if (err) + return err; + if (nr < 0) + return -EINVAL; + + dests = container_of(kobj, struct damos_sysfs_dests, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damos_sysfs_dests_add_dirs(dests, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + + return count; +} + +static void damos_sysfs_dests_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damos_sysfs_dests, kobj)); +} + +static struct kobj_attribute damos_sysfs_dests_nr_attr = + __ATTR_RW_MODE(nr_dests, 0600); + +static struct attribute *damos_sysfs_dests_attrs[] = { + &damos_sysfs_dests_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damos_sysfs_dests); + +static const struct kobj_type damos_sysfs_dests_ktype = { + .release = damos_sysfs_dests_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damos_sysfs_dests_groups, +}; + +/* + * scheme directory + */ + +struct damon_sysfs_scheme { + struct kobject kobj; + enum damos_action action; + struct damon_sysfs_access_pattern *access_pattern; + unsigned long apply_interval_us; + struct damon_sysfs_quotas *quotas; + struct damon_sysfs_watermarks *watermarks; + struct damon_sysfs_scheme_filters *core_filters; + struct damon_sysfs_scheme_filters *ops_filters; + struct damon_sysfs_scheme_filters *filters; + struct damon_sysfs_stats *stats; + struct damon_sysfs_scheme_regions *tried_regions; + int target_nid; + struct damos_sysfs_dests *dests; +}; + +struct damos_sysfs_action_name { + enum damos_action action; + char *name; +}; + +static struct damos_sysfs_action_name damos_sysfs_action_names[] = { + { + .action = DAMOS_WILLNEED, + .name = "willneed", + }, + { + .action = DAMOS_COLD, + .name = "cold", + }, + { + .action = DAMOS_PAGEOUT, + .name = "pageout", + }, + { + .action = DAMOS_HUGEPAGE, + .name = "hugepage", + }, + { + .action = DAMOS_NOHUGEPAGE, + .name = "nohugepage", + }, + { + .action = DAMOS_LRU_PRIO, + .name = "lru_prio", + }, + { + .action = DAMOS_LRU_DEPRIO, + .name = "lru_deprio", + }, + { + .action = DAMOS_MIGRATE_HOT, + .name = "migrate_hot", + }, + { + .action = DAMOS_MIGRATE_COLD, + .name = "migrate_cold", + }, + { + .action = DAMOS_STAT, + .name = "stat", + }, +}; + +static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc( + enum damos_action action, unsigned long apply_interval_us) +{ + struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme), + GFP_KERNEL); + + if (!scheme) + return NULL; + scheme->kobj = (struct kobject){}; + scheme->action = action; + scheme->apply_interval_us = apply_interval_us; + scheme->target_nid = NUMA_NO_NODE; + return scheme; +} + +static int damon_sysfs_scheme_set_access_pattern( + struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_access_pattern *access_pattern; + int err; + + access_pattern = damon_sysfs_access_pattern_alloc(); + if (!access_pattern) + return -ENOMEM; + err = kobject_init_and_add(&access_pattern->kobj, + &damon_sysfs_access_pattern_ktype, &scheme->kobj, + "access_pattern"); + if (err) + goto out; + err = damon_sysfs_access_pattern_add_dirs(access_pattern); + if (err) + goto out; + scheme->access_pattern = access_pattern; + return 0; + +out: + kobject_put(&access_pattern->kobj); + return err; +} + +static int damos_sysfs_set_dests(struct damon_sysfs_scheme *scheme) +{ + struct damos_sysfs_dests *dests = damos_sysfs_dests_alloc(); + int err; + + if (!dests) + return -ENOMEM; + err = kobject_init_and_add(&dests->kobj, &damos_sysfs_dests_ktype, + &scheme->kobj, "dests"); + if (err) + kobject_put(&dests->kobj); + else + scheme->dests = dests; + return err; +} + +static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc(); + int err; + + if (!quotas) + return -ENOMEM; + err = kobject_init_and_add("as->kobj, &damon_sysfs_quotas_ktype, + &scheme->kobj, "quotas"); + if (err) + goto out; + err = damon_sysfs_quotas_add_dirs(quotas); + if (err) + goto out; + scheme->quotas = quotas; + return 0; + +out: + kobject_put("as->kobj); + return err; +} + +static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_watermarks *watermarks = + damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0); + int err; + + if (!watermarks) + return -ENOMEM; + err = kobject_init_and_add(&watermarks->kobj, + &damon_sysfs_watermarks_ktype, &scheme->kobj, + "watermarks"); + if (err) + kobject_put(&watermarks->kobj); + else + scheme->watermarks = watermarks; + return err; +} + +static int damon_sysfs_scheme_set_filters(struct damon_sysfs_scheme *scheme, + enum damos_sysfs_filter_handle_layer layer, const char *name, + struct damon_sysfs_scheme_filters **filters_ptr) +{ + struct damon_sysfs_scheme_filters *filters = + damon_sysfs_scheme_filters_alloc(layer); + int err; + + if (!filters) + return -ENOMEM; + err = kobject_init_and_add(&filters->kobj, + &damon_sysfs_scheme_filters_ktype, &scheme->kobj, + "%s", name); + if (err) + kobject_put(&filters->kobj); + else + *filters_ptr = filters; + return err; +} + +static int damos_sysfs_set_filter_dirs(struct damon_sysfs_scheme *scheme) +{ + int err; + + err = damon_sysfs_scheme_set_filters(scheme, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH, "filters", + &scheme->filters); + if (err) + return err; + err = damon_sysfs_scheme_set_filters(scheme, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE, "core_filters", + &scheme->core_filters); + if (err) + goto put_filters_out; + err = damon_sysfs_scheme_set_filters(scheme, + DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS, "ops_filters", + &scheme->ops_filters); + if (err) + goto put_core_filters_out; + return 0; + +put_core_filters_out: + kobject_put(&scheme->core_filters->kobj); + scheme->core_filters = NULL; +put_filters_out: + kobject_put(&scheme->filters->kobj); + scheme->filters = NULL; + return err; +} + +static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc(); + int err; + + if (!stats) + return -ENOMEM; + err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype, + &scheme->kobj, "stats"); + if (err) + kobject_put(&stats->kobj); + else + scheme->stats = stats; + return err; +} + +static int damon_sysfs_scheme_set_tried_regions( + struct damon_sysfs_scheme *scheme) +{ + struct damon_sysfs_scheme_regions *tried_regions = + damon_sysfs_scheme_regions_alloc(); + int err; + + if (!tried_regions) + return -ENOMEM; + err = kobject_init_and_add(&tried_regions->kobj, + &damon_sysfs_scheme_regions_ktype, &scheme->kobj, + "tried_regions"); + if (err) + kobject_put(&tried_regions->kobj); + else + scheme->tried_regions = tried_regions; + return err; +} + +static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) +{ + int err; + + err = damon_sysfs_scheme_set_access_pattern(scheme); + if (err) + return err; + err = damos_sysfs_set_dests(scheme); + if (err) + goto put_access_pattern_out; + err = damon_sysfs_scheme_set_quotas(scheme); + if (err) + goto put_dests_out; + err = damon_sysfs_scheme_set_watermarks(scheme); + if (err) + goto put_quotas_access_pattern_out; + err = damos_sysfs_set_filter_dirs(scheme); + if (err) + goto put_watermarks_quotas_access_pattern_out; + err = damon_sysfs_scheme_set_stats(scheme); + if (err) + goto put_filters_watermarks_quotas_access_pattern_out; + err = damon_sysfs_scheme_set_tried_regions(scheme); + if (err) + goto put_tried_regions_out; + return 0; + +put_tried_regions_out: + kobject_put(&scheme->tried_regions->kobj); + scheme->tried_regions = NULL; +put_filters_watermarks_quotas_access_pattern_out: + kobject_put(&scheme->ops_filters->kobj); + scheme->ops_filters = NULL; + kobject_put(&scheme->core_filters->kobj); + scheme->core_filters = NULL; + kobject_put(&scheme->filters->kobj); + scheme->filters = NULL; +put_watermarks_quotas_access_pattern_out: + kobject_put(&scheme->watermarks->kobj); + scheme->watermarks = NULL; +put_quotas_access_pattern_out: + kobject_put(&scheme->quotas->kobj); + scheme->quotas = NULL; +put_dests_out: + kobject_put(&scheme->dests->kobj); + scheme->dests = NULL; +put_access_pattern_out: + kobject_put(&scheme->access_pattern->kobj); + scheme->access_pattern = NULL; + return err; +} + +static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) +{ + damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); + kobject_put(&scheme->access_pattern->kobj); + damos_sysfs_dests_rm_dirs(scheme->dests); + kobject_put(&scheme->dests->kobj); + damon_sysfs_quotas_rm_dirs(scheme->quotas); + kobject_put(&scheme->quotas->kobj); + kobject_put(&scheme->watermarks->kobj); + damon_sysfs_scheme_filters_rm_dirs(scheme->filters); + kobject_put(&scheme->filters->kobj); + damon_sysfs_scheme_filters_rm_dirs(scheme->core_filters); + kobject_put(&scheme->core_filters->kobj); + damon_sysfs_scheme_filters_rm_dirs(scheme->ops_filters); + kobject_put(&scheme->ops_filters->kobj); + kobject_put(&scheme->stats->kobj); + damon_sysfs_scheme_regions_rm_dirs(scheme->tried_regions); + kobject_put(&scheme->tried_regions->kobj); +} + +static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(damos_sysfs_action_names); i++) { + struct damos_sysfs_action_name *action_name; + + action_name = &damos_sysfs_action_names[i]; + if (action_name->action == scheme->action) + return sysfs_emit(buf, "%s\n", action_name->name); + } + return -EINVAL; +} + +static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(damos_sysfs_action_names); i++) { + struct damos_sysfs_action_name *action_name; + + action_name = &damos_sysfs_action_names[i]; + if (sysfs_streq(buf, action_name->name)) { + scheme->action = action_name->action; + return count; + } + } + return -EINVAL; +} + +static ssize_t apply_interval_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + + return sysfs_emit(buf, "%lu\n", scheme->apply_interval_us); +} + +static ssize_t apply_interval_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + int err = kstrtoul(buf, 0, &scheme->apply_interval_us); + + return err ? err : count; +} + +static ssize_t target_nid_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + + return sysfs_emit(buf, "%d\n", scheme->target_nid); +} + +static ssize_t target_nid_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_scheme *scheme = container_of(kobj, + struct damon_sysfs_scheme, kobj); + int err = 0; + + /* TODO: error handling for target_nid range. */ + err = kstrtoint(buf, 0, &scheme->target_nid); + + return err ? err : count; +} + +static void damon_sysfs_scheme_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_scheme, kobj)); +} + +static struct kobj_attribute damon_sysfs_scheme_action_attr = + __ATTR_RW_MODE(action, 0600); + +static struct kobj_attribute damon_sysfs_scheme_apply_interval_us_attr = + __ATTR_RW_MODE(apply_interval_us, 0600); + +static struct kobj_attribute damon_sysfs_scheme_target_nid_attr = + __ATTR_RW_MODE(target_nid, 0600); + +static struct attribute *damon_sysfs_scheme_attrs[] = { + &damon_sysfs_scheme_action_attr.attr, + &damon_sysfs_scheme_apply_interval_us_attr.attr, + &damon_sysfs_scheme_target_nid_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_scheme); + +static const struct kobj_type damon_sysfs_scheme_ktype = { + .release = damon_sysfs_scheme_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_scheme_groups, +}; + +/* + * schemes directory + */ + +struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL); +} + +void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes) +{ + struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr; + int i; + + for (i = 0; i < schemes->nr; i++) { + damon_sysfs_scheme_rm_dirs(schemes_arr[i]); + kobject_put(&schemes_arr[i]->kobj); + } + schemes->nr = 0; + kfree(schemes_arr); + schemes->schemes_arr = NULL; +} + +static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes, + int nr_schemes) +{ + struct damon_sysfs_scheme **schemes_arr, *scheme; + int err, i; + + damon_sysfs_schemes_rm_dirs(schemes); + if (!nr_schemes) + return 0; + + schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!schemes_arr) + return -ENOMEM; + schemes->schemes_arr = schemes_arr; + + for (i = 0; i < nr_schemes; i++) { + /* + * apply_interval_us as 0 means same to aggregation interval + * (same to before-apply_interval behavior) + */ + scheme = damon_sysfs_scheme_alloc(DAMOS_STAT, 0); + if (!scheme) { + damon_sysfs_schemes_rm_dirs(schemes); + return -ENOMEM; + } + + err = kobject_init_and_add(&scheme->kobj, + &damon_sysfs_scheme_ktype, &schemes->kobj, + "%d", i); + if (err) + goto out; + err = damon_sysfs_scheme_add_dirs(scheme); + if (err) + goto out; + + schemes_arr[i] = scheme; + schemes->nr++; + } + return 0; + +out: + damon_sysfs_schemes_rm_dirs(schemes); + kobject_put(&scheme->kobj); + return err; +} + +static ssize_t nr_schemes_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_schemes *schemes = container_of(kobj, + struct damon_sysfs_schemes, kobj); + + return sysfs_emit(buf, "%d\n", schemes->nr); +} + +static ssize_t nr_schemes_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_schemes *schemes; + int nr, err = kstrtoint(buf, 0, &nr); + + if (err) + return err; + if (nr < 0) + return -EINVAL; + + schemes = container_of(kobj, struct damon_sysfs_schemes, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damon_sysfs_schemes_add_dirs(schemes, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + return count; +} + +static void damon_sysfs_schemes_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_schemes, kobj)); +} + +static struct kobj_attribute damon_sysfs_schemes_nr_attr = + __ATTR_RW_MODE(nr_schemes, 0600); + +static struct attribute *damon_sysfs_schemes_attrs[] = { + &damon_sysfs_schemes_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_schemes); + +const struct kobj_type damon_sysfs_schemes_ktype = { + .release = damon_sysfs_schemes_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_schemes_groups, +}; + +static bool damon_sysfs_memcg_path_eq(struct mem_cgroup *memcg, + char *memcg_path_buf, char *path) +{ +#ifdef CONFIG_MEMCG + cgroup_path(memcg->css.cgroup, memcg_path_buf, PATH_MAX); + if (sysfs_streq(memcg_path_buf, path)) + return true; +#endif /* CONFIG_MEMCG */ + return false; +} + +static int damon_sysfs_memcg_path_to_id(char *memcg_path, unsigned short *id) +{ + struct mem_cgroup *memcg; + char *path; + bool found = false; + + if (!memcg_path) + return -EINVAL; + + path = kmalloc_array(PATH_MAX, sizeof(*path), GFP_KERNEL); + if (!path) + return -ENOMEM; + + for (memcg = mem_cgroup_iter(NULL, NULL, NULL); memcg; + memcg = mem_cgroup_iter(NULL, memcg, NULL)) { + /* skip removed memcg */ + if (!mem_cgroup_id(memcg)) + continue; + if (damon_sysfs_memcg_path_eq(memcg, path, memcg_path)) { + *id = mem_cgroup_id(memcg); + found = true; + break; + } + } + + kfree(path); + return found ? 0 : -EINVAL; +} + +static int damon_sysfs_add_scheme_filters(struct damos *scheme, + struct damon_sysfs_scheme_filters *sysfs_filters) +{ + int i; + + for (i = 0; i < sysfs_filters->nr; i++) { + struct damon_sysfs_scheme_filter *sysfs_filter = + sysfs_filters->filters_arr[i]; + struct damos_filter *filter = + damos_new_filter(sysfs_filter->type, + sysfs_filter->matching, + sysfs_filter->allow); + int err; + + if (!filter) + return -ENOMEM; + if (filter->type == DAMOS_FILTER_TYPE_MEMCG) { + err = damon_sysfs_memcg_path_to_id( + sysfs_filter->memcg_path, + &filter->memcg_id); + if (err) { + damos_destroy_filter(filter); + return err; + } + } else if (filter->type == DAMOS_FILTER_TYPE_ADDR) { + if (sysfs_filter->addr_range.end < + sysfs_filter->addr_range.start) { + damos_destroy_filter(filter); + return -EINVAL; + } + filter->addr_range = sysfs_filter->addr_range; + } else if (filter->type == DAMOS_FILTER_TYPE_TARGET) { + filter->target_idx = sysfs_filter->target_idx; + } else if (filter->type == DAMOS_FILTER_TYPE_HUGEPAGE_SIZE) { + if (sysfs_filter->sz_range.min > + sysfs_filter->sz_range.max) { + damos_destroy_filter(filter); + return -EINVAL; + } + filter->sz_range = sysfs_filter->sz_range; + } + + damos_add_filter(scheme, filter); + } + return 0; +} + +static int damos_sysfs_add_quota_score( + struct damos_sysfs_quota_goals *sysfs_goals, + struct damos_quota *quota) +{ + struct damos_quota_goal *goal; + int i, err; + + for (i = 0; i < sysfs_goals->nr; i++) { + struct damos_sysfs_quota_goal *sysfs_goal = + sysfs_goals->goals_arr[i]; + + if (!sysfs_goal->target_value) + continue; + + goal = damos_new_quota_goal(sysfs_goal->metric, + sysfs_goal->target_value); + if (!goal) + return -ENOMEM; + switch (sysfs_goal->metric) { + case DAMOS_QUOTA_USER_INPUT: + goal->current_value = sysfs_goal->current_value; + break; + case DAMOS_QUOTA_NODE_MEM_USED_BP: + case DAMOS_QUOTA_NODE_MEM_FREE_BP: + goal->nid = sysfs_goal->nid; + break; + case DAMOS_QUOTA_NODE_MEMCG_USED_BP: + case DAMOS_QUOTA_NODE_MEMCG_FREE_BP: + err = damon_sysfs_memcg_path_to_id( + sysfs_goal->path, &goal->memcg_id); + if (err) { + damos_destroy_quota_goal(goal); + return err; + } + goal->nid = sysfs_goal->nid; + break; + default: + break; + } + damos_add_quota_goal(quota, goal); + } + return 0; +} + +int damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) +{ + struct damos *scheme; + struct damos_quota quota = {}; + int i = 0; + + INIT_LIST_HEAD("a.goals); + damon_for_each_scheme(scheme, ctx) { + struct damon_sysfs_scheme *sysfs_scheme; + struct damos_quota_goal *g, *g_next; + int err; + + /* user could have removed the scheme sysfs dir */ + if (i >= sysfs_schemes->nr) + break; + + sysfs_scheme = sysfs_schemes->schemes_arr[i]; + err = damos_sysfs_add_quota_score(sysfs_scheme->quotas->goals, + "a); + if (err) { + damos_for_each_quota_goal_safe(g, g_next, "a) + damos_destroy_quota_goal(g); + return err; + } + err = damos_commit_quota_goals(&scheme->quota, "a); + damos_for_each_quota_goal_safe(g, g_next, "a) + damos_destroy_quota_goal(g); + if (err) + return err; + i++; + } + return 0; +} + +void damos_sysfs_update_effective_quotas( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) +{ + struct damos *scheme; + int schemes_idx = 0; + + damon_for_each_scheme(scheme, ctx) { + struct damon_sysfs_quotas *sysfs_quotas; + + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + + sysfs_quotas = + sysfs_schemes->schemes_arr[schemes_idx++]->quotas; + sysfs_quotas->effective_sz = scheme->quota.esz; + } +} + +static int damos_sysfs_add_migrate_dest(struct damos *scheme, + struct damos_sysfs_dests *sysfs_dests) +{ + struct damos_migrate_dests *dests = &scheme->migrate_dests; + int i; + + dests->node_id_arr = kmalloc_array(sysfs_dests->nr, + sizeof(*dests->node_id_arr), GFP_KERNEL); + if (!dests->node_id_arr) + return -ENOMEM; + dests->weight_arr = kmalloc_array(sysfs_dests->nr, + sizeof(*dests->weight_arr), GFP_KERNEL); + if (!dests->weight_arr) + /* ->node_id_arr will be freed by scheme destruction */ + return -ENOMEM; + for (i = 0; i < sysfs_dests->nr; i++) { + dests->node_id_arr[i] = sysfs_dests->dests_arr[i]->id; + dests->weight_arr[i] = sysfs_dests->dests_arr[i]->weight; + } + dests->nr_dests = sysfs_dests->nr; + return 0; +} + +static struct damos *damon_sysfs_mk_scheme( + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + struct damos *scheme; + int err; + + struct damos_access_pattern pattern = { + .min_sz_region = access_pattern->sz->min, + .max_sz_region = access_pattern->sz->max, + .min_nr_accesses = access_pattern->nr_accesses->min, + .max_nr_accesses = access_pattern->nr_accesses->max, + .min_age_region = access_pattern->age->min, + .max_age_region = access_pattern->age->max, + }; + struct damos_quota quota = { + .ms = sysfs_quotas->ms, + .sz = sysfs_quotas->sz, + .reset_interval = sysfs_quotas->reset_interval_ms, + .weight_sz = sysfs_weights->sz, + .weight_nr_accesses = sysfs_weights->nr_accesses, + .weight_age = sysfs_weights->age, + }; + struct damos_watermarks wmarks = { + .metric = sysfs_wmarks->metric, + .interval = sysfs_wmarks->interval_us, + .high = sysfs_wmarks->high, + .mid = sysfs_wmarks->mid, + .low = sysfs_wmarks->low, + }; + + scheme = damon_new_scheme(&pattern, sysfs_scheme->action, + sysfs_scheme->apply_interval_us, "a, &wmarks, + sysfs_scheme->target_nid); + if (!scheme) + return NULL; + + err = damos_sysfs_add_quota_score(sysfs_quotas->goals, &scheme->quota); + if (err) { + damon_destroy_scheme(scheme); + return NULL; + } + + err = damon_sysfs_add_scheme_filters(scheme, sysfs_scheme->core_filters); + if (err) { + damon_destroy_scheme(scheme); + return NULL; + } + err = damon_sysfs_add_scheme_filters(scheme, sysfs_scheme->ops_filters); + if (err) { + damon_destroy_scheme(scheme); + return NULL; + } + err = damon_sysfs_add_scheme_filters(scheme, sysfs_scheme->filters); + if (err) { + damon_destroy_scheme(scheme); + return NULL; + } + err = damos_sysfs_add_migrate_dest(scheme, sysfs_scheme->dests); + if (err) { + damon_destroy_scheme(scheme); + return NULL; + } + return scheme; +} + +int damon_sysfs_add_schemes(struct damon_ctx *ctx, + struct damon_sysfs_schemes *sysfs_schemes) +{ + int i; + + for (i = 0; i < sysfs_schemes->nr; i++) { + struct damos *scheme, *next; + + scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); + if (!scheme) { + damon_for_each_scheme_safe(scheme, next, ctx) + damon_destroy_scheme(scheme); + return -ENOMEM; + } + damon_add_scheme(ctx, scheme); + } + return 0; +} + +void damon_sysfs_schemes_update_stats( + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) +{ + struct damos *scheme; + int schemes_idx = 0; + + damon_for_each_scheme(scheme, ctx) { + struct damon_sysfs_stats *sysfs_stats; + + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + + sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats; + sysfs_stats->nr_tried = scheme->stat.nr_tried; + sysfs_stats->sz_tried = scheme->stat.sz_tried; + sysfs_stats->nr_applied = scheme->stat.nr_applied; + sysfs_stats->sz_applied = scheme->stat.sz_applied; + sysfs_stats->sz_ops_filter_passed = + scheme->stat.sz_ops_filter_passed; + sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds; + } +} + +/** + * damos_sysfs_populate_region_dir() - Populate a schemes tried region dir. + * @sysfs_schemes: Schemes directory to populate regions directory. + * @ctx: Corresponding DAMON context. + * @t: DAMON target of @r. + * @r: DAMON region to populate the directory for. + * @s: Corresponding scheme. + * @total_bytes_only: Whether the request is for bytes update only. + * @sz_filter_passed: Bytes of @r that passed filters of @s. + * + * Called from DAMOS walk callback while holding damon_sysfs_lock. + */ +void damos_sysfs_populate_region_dir(struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos *s, bool total_bytes_only, + unsigned long sz_filter_passed) +{ + struct damos *scheme; + struct damon_sysfs_scheme_regions *sysfs_regions; + struct damon_sysfs_scheme_region *region; + int schemes_idx = 0; + + damon_for_each_scheme(scheme, ctx) { + if (scheme == s) + break; + schemes_idx++; + } + + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + return; + + sysfs_regions = sysfs_schemes->schemes_arr[schemes_idx]->tried_regions; + sysfs_regions->total_bytes += r->ar.end - r->ar.start; + if (total_bytes_only) + return; + + region = damon_sysfs_scheme_region_alloc(r); + if (!region) + return; + region->sz_filter_passed = sz_filter_passed; + list_add_tail(®ion->list, &sysfs_regions->regions_list); + sysfs_regions->nr_regions++; + if (kobject_init_and_add(®ion->kobj, + &damon_sysfs_scheme_region_ktype, + &sysfs_regions->kobj, "%d", + sysfs_regions->nr_regions++)) { + kobject_put(®ion->kobj); + } +} + +int damon_sysfs_schemes_clear_regions( + struct damon_sysfs_schemes *sysfs_schemes) +{ + int i; + + for (i = 0; i < sysfs_schemes->nr; i++) { + struct damon_sysfs_scheme *sysfs_scheme; + + sysfs_scheme = sysfs_schemes->schemes_arr[i]; + damon_sysfs_scheme_regions_rm_dirs( + sysfs_scheme->tried_regions); + sysfs_scheme->tried_regions->total_bytes = 0; + } + return 0; +} diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c new file mode 100644 index 000000000000..e2bd2d7becdd --- /dev/null +++ b/mm/damon/sysfs.c @@ -0,0 +1,2108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON sysfs Interface + * + * Copyright (c) 2022 SeongJae Park <sj@kernel.org> + */ + +#include <linux/pid.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include "sysfs-common.h" + +/* + * init region directory + */ + +struct damon_sysfs_region { + struct kobject kobj; + struct damon_addr_range ar; +}; + +static struct damon_sysfs_region *damon_sysfs_region_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_region), GFP_KERNEL); +} + +static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_region *region = container_of(kobj, + struct damon_sysfs_region, kobj); + + return sysfs_emit(buf, "%lu\n", region->ar.start); +} + +static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_region *region = container_of(kobj, + struct damon_sysfs_region, kobj); + int err = kstrtoul(buf, 0, ®ion->ar.start); + + return err ? err : count; +} + +static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_region *region = container_of(kobj, + struct damon_sysfs_region, kobj); + + return sysfs_emit(buf, "%lu\n", region->ar.end); +} + +static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_region *region = container_of(kobj, + struct damon_sysfs_region, kobj); + int err = kstrtoul(buf, 0, ®ion->ar.end); + + return err ? err : count; +} + +static void damon_sysfs_region_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_region, kobj)); +} + +static struct kobj_attribute damon_sysfs_region_start_attr = + __ATTR_RW_MODE(start, 0600); + +static struct kobj_attribute damon_sysfs_region_end_attr = + __ATTR_RW_MODE(end, 0600); + +static struct attribute *damon_sysfs_region_attrs[] = { + &damon_sysfs_region_start_attr.attr, + &damon_sysfs_region_end_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_region); + +static const struct kobj_type damon_sysfs_region_ktype = { + .release = damon_sysfs_region_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_region_groups, +}; + +/* + * init_regions directory + */ + +struct damon_sysfs_regions { + struct kobject kobj; + struct damon_sysfs_region **regions_arr; + int nr; +}; + +static struct damon_sysfs_regions *damon_sysfs_regions_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_regions), GFP_KERNEL); +} + +static void damon_sysfs_regions_rm_dirs(struct damon_sysfs_regions *regions) +{ + struct damon_sysfs_region **regions_arr = regions->regions_arr; + int i; + + for (i = 0; i < regions->nr; i++) + kobject_put(®ions_arr[i]->kobj); + regions->nr = 0; + kfree(regions_arr); + regions->regions_arr = NULL; +} + +static int damon_sysfs_regions_add_dirs(struct damon_sysfs_regions *regions, + int nr_regions) +{ + struct damon_sysfs_region **regions_arr, *region; + int err, i; + + damon_sysfs_regions_rm_dirs(regions); + if (!nr_regions) + return 0; + + regions_arr = kmalloc_array(nr_regions, sizeof(*regions_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!regions_arr) + return -ENOMEM; + regions->regions_arr = regions_arr; + + for (i = 0; i < nr_regions; i++) { + region = damon_sysfs_region_alloc(); + if (!region) { + damon_sysfs_regions_rm_dirs(regions); + return -ENOMEM; + } + + err = kobject_init_and_add(®ion->kobj, + &damon_sysfs_region_ktype, ®ions->kobj, + "%d", i); + if (err) { + kobject_put(®ion->kobj); + damon_sysfs_regions_rm_dirs(regions); + return err; + } + + regions_arr[i] = region; + regions->nr++; + } + return 0; +} + +static ssize_t nr_regions_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_regions *regions = container_of(kobj, + struct damon_sysfs_regions, kobj); + + return sysfs_emit(buf, "%d\n", regions->nr); +} + +static ssize_t nr_regions_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_regions *regions; + int nr, err = kstrtoint(buf, 0, &nr); + + if (err) + return err; + if (nr < 0) + return -EINVAL; + + regions = container_of(kobj, struct damon_sysfs_regions, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damon_sysfs_regions_add_dirs(regions, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + + return count; +} + +static void damon_sysfs_regions_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_regions, kobj)); +} + +static struct kobj_attribute damon_sysfs_regions_nr_attr = + __ATTR_RW_MODE(nr_regions, 0600); + +static struct attribute *damon_sysfs_regions_attrs[] = { + &damon_sysfs_regions_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_regions); + +static const struct kobj_type damon_sysfs_regions_ktype = { + .release = damon_sysfs_regions_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_regions_groups, +}; + +/* + * target directory + */ + +struct damon_sysfs_target { + struct kobject kobj; + struct damon_sysfs_regions *regions; + int pid; + bool obsolete; +}; + +static struct damon_sysfs_target *damon_sysfs_target_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_target), GFP_KERNEL); +} + +static int damon_sysfs_target_add_dirs(struct damon_sysfs_target *target) +{ + struct damon_sysfs_regions *regions = damon_sysfs_regions_alloc(); + int err; + + if (!regions) + return -ENOMEM; + + err = kobject_init_and_add(®ions->kobj, &damon_sysfs_regions_ktype, + &target->kobj, "regions"); + if (err) + kobject_put(®ions->kobj); + else + target->regions = regions; + return err; +} + +static void damon_sysfs_target_rm_dirs(struct damon_sysfs_target *target) +{ + damon_sysfs_regions_rm_dirs(target->regions); + kobject_put(&target->regions->kobj); +} + +static ssize_t pid_target_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_target *target = container_of(kobj, + struct damon_sysfs_target, kobj); + + return sysfs_emit(buf, "%d\n", target->pid); +} + +static ssize_t pid_target_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_target *target = container_of(kobj, + struct damon_sysfs_target, kobj); + int err = kstrtoint(buf, 0, &target->pid); + + if (err) + return -EINVAL; + return count; +} + +static ssize_t obsolete_target_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_target *target = container_of(kobj, + struct damon_sysfs_target, kobj); + + return sysfs_emit(buf, "%c\n", target->obsolete ? 'Y' : 'N'); +} + +static ssize_t obsolete_target_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_target *target = container_of(kobj, + struct damon_sysfs_target, kobj); + bool obsolete; + int err = kstrtobool(buf, &obsolete); + + if (err) + return err; + target->obsolete = obsolete; + return count; +} + +static void damon_sysfs_target_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_target, kobj)); +} + +static struct kobj_attribute damon_sysfs_target_pid_attr = + __ATTR_RW_MODE(pid_target, 0600); + +static struct kobj_attribute damon_sysfs_target_obsolete_attr = + __ATTR_RW_MODE(obsolete_target, 0600); + +static struct attribute *damon_sysfs_target_attrs[] = { + &damon_sysfs_target_pid_attr.attr, + &damon_sysfs_target_obsolete_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_target); + +static const struct kobj_type damon_sysfs_target_ktype = { + .release = damon_sysfs_target_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_target_groups, +}; + +/* + * targets directory + */ + +struct damon_sysfs_targets { + struct kobject kobj; + struct damon_sysfs_target **targets_arr; + int nr; +}; + +static struct damon_sysfs_targets *damon_sysfs_targets_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_targets), GFP_KERNEL); +} + +static void damon_sysfs_targets_rm_dirs(struct damon_sysfs_targets *targets) +{ + struct damon_sysfs_target **targets_arr = targets->targets_arr; + int i; + + for (i = 0; i < targets->nr; i++) { + damon_sysfs_target_rm_dirs(targets_arr[i]); + kobject_put(&targets_arr[i]->kobj); + } + targets->nr = 0; + kfree(targets_arr); + targets->targets_arr = NULL; +} + +static int damon_sysfs_targets_add_dirs(struct damon_sysfs_targets *targets, + int nr_targets) +{ + struct damon_sysfs_target **targets_arr, *target; + int err, i; + + damon_sysfs_targets_rm_dirs(targets); + if (!nr_targets) + return 0; + + targets_arr = kmalloc_array(nr_targets, sizeof(*targets_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!targets_arr) + return -ENOMEM; + targets->targets_arr = targets_arr; + + for (i = 0; i < nr_targets; i++) { + target = damon_sysfs_target_alloc(); + if (!target) { + damon_sysfs_targets_rm_dirs(targets); + return -ENOMEM; + } + + err = kobject_init_and_add(&target->kobj, + &damon_sysfs_target_ktype, &targets->kobj, + "%d", i); + if (err) + goto out; + + err = damon_sysfs_target_add_dirs(target); + if (err) + goto out; + + targets_arr[i] = target; + targets->nr++; + } + return 0; + +out: + damon_sysfs_targets_rm_dirs(targets); + kobject_put(&target->kobj); + return err; +} + +static ssize_t nr_targets_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_targets *targets = container_of(kobj, + struct damon_sysfs_targets, kobj); + + return sysfs_emit(buf, "%d\n", targets->nr); +} + +static ssize_t nr_targets_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_targets *targets; + int nr, err = kstrtoint(buf, 0, &nr); + + if (err) + return err; + if (nr < 0) + return -EINVAL; + + targets = container_of(kobj, struct damon_sysfs_targets, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damon_sysfs_targets_add_dirs(targets, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + + return count; +} + +static void damon_sysfs_targets_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_targets, kobj)); +} + +static struct kobj_attribute damon_sysfs_targets_nr_attr = + __ATTR_RW_MODE(nr_targets, 0600); + +static struct attribute *damon_sysfs_targets_attrs[] = { + &damon_sysfs_targets_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_targets); + +static const struct kobj_type damon_sysfs_targets_ktype = { + .release = damon_sysfs_targets_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_targets_groups, +}; + +/* + * intervals goal directory + */ + +struct damon_sysfs_intervals_goal { + struct kobject kobj; + unsigned long access_bp; + unsigned long aggrs; + unsigned long min_sample_us; + unsigned long max_sample_us; +}; + +static struct damon_sysfs_intervals_goal *damon_sysfs_intervals_goal_alloc( + unsigned long access_bp, unsigned long aggrs, + unsigned long min_sample_us, unsigned long max_sample_us) +{ + struct damon_sysfs_intervals_goal *goal = kmalloc(sizeof(*goal), + GFP_KERNEL); + + if (!goal) + return NULL; + + goal->kobj = (struct kobject){}; + goal->access_bp = access_bp; + goal->aggrs = aggrs; + goal->min_sample_us = min_sample_us; + goal->max_sample_us = max_sample_us; + return goal; +} + +static ssize_t access_bp_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->access_bp); +} + +static ssize_t access_bp_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + unsigned long nr; + int err = kstrtoul(buf, 0, &nr); + + if (err) + return err; + + goal->access_bp = nr; + return count; +} + +static ssize_t aggrs_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->aggrs); +} + +static ssize_t aggrs_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + unsigned long nr; + int err = kstrtoul(buf, 0, &nr); + + if (err) + return err; + + goal->aggrs = nr; + return count; +} + +static ssize_t min_sample_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->min_sample_us); +} + +static ssize_t min_sample_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + unsigned long nr; + int err = kstrtoul(buf, 0, &nr); + + if (err) + return err; + + goal->min_sample_us = nr; + return count; +} + +static ssize_t max_sample_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + + return sysfs_emit(buf, "%lu\n", goal->max_sample_us); +} + +static ssize_t max_sample_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals_goal *goal = container_of(kobj, + struct damon_sysfs_intervals_goal, kobj); + unsigned long nr; + int err = kstrtoul(buf, 0, &nr); + + if (err) + return err; + + goal->max_sample_us = nr; + return count; +} + +static void damon_sysfs_intervals_goal_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_intervals_goal, kobj)); +} + +static struct kobj_attribute damon_sysfs_intervals_goal_access_bp_attr = + __ATTR_RW_MODE(access_bp, 0600); + +static struct kobj_attribute damon_sysfs_intervals_goal_aggrs_attr = + __ATTR_RW_MODE(aggrs, 0600); + +static struct kobj_attribute damon_sysfs_intervals_goal_min_sample_us_attr = + __ATTR_RW_MODE(min_sample_us, 0600); + +static struct kobj_attribute damon_sysfs_intervals_goal_max_sample_us_attr = + __ATTR_RW_MODE(max_sample_us, 0600); + +static struct attribute *damon_sysfs_intervals_goal_attrs[] = { + &damon_sysfs_intervals_goal_access_bp_attr.attr, + &damon_sysfs_intervals_goal_aggrs_attr.attr, + &damon_sysfs_intervals_goal_min_sample_us_attr.attr, + &damon_sysfs_intervals_goal_max_sample_us_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_intervals_goal); + +static const struct kobj_type damon_sysfs_intervals_goal_ktype = { + .release = damon_sysfs_intervals_goal_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_intervals_goal_groups, +}; + +/* + * intervals directory + */ + +struct damon_sysfs_intervals { + struct kobject kobj; + unsigned long sample_us; + unsigned long aggr_us; + unsigned long update_us; + struct damon_sysfs_intervals_goal *intervals_goal; +}; + +static struct damon_sysfs_intervals *damon_sysfs_intervals_alloc( + unsigned long sample_us, unsigned long aggr_us, + unsigned long update_us) +{ + struct damon_sysfs_intervals *intervals = kmalloc(sizeof(*intervals), + GFP_KERNEL); + + if (!intervals) + return NULL; + + intervals->kobj = (struct kobject){}; + intervals->sample_us = sample_us; + intervals->aggr_us = aggr_us; + intervals->update_us = update_us; + return intervals; +} + +static int damon_sysfs_intervals_add_dirs(struct damon_sysfs_intervals *intervals) +{ + struct damon_sysfs_intervals_goal *goal; + int err; + + goal = damon_sysfs_intervals_goal_alloc(0, 0, 0, 0); + if (!goal) + return -ENOMEM; + + err = kobject_init_and_add(&goal->kobj, + &damon_sysfs_intervals_goal_ktype, &intervals->kobj, + "intervals_goal"); + if (err) { + kobject_put(&goal->kobj); + intervals->intervals_goal = NULL; + return err; + } + intervals->intervals_goal = goal; + return 0; +} + +static void damon_sysfs_intervals_rm_dirs(struct damon_sysfs_intervals *intervals) +{ + kobject_put(&intervals->intervals_goal->kobj); +} + +static ssize_t sample_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals *intervals = container_of(kobj, + struct damon_sysfs_intervals, kobj); + + return sysfs_emit(buf, "%lu\n", intervals->sample_us); +} + +static ssize_t sample_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals *intervals = container_of(kobj, + struct damon_sysfs_intervals, kobj); + unsigned long us; + int err = kstrtoul(buf, 0, &us); + + if (err) + return err; + + intervals->sample_us = us; + return count; +} + +static ssize_t aggr_us_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_intervals *intervals = container_of(kobj, + struct damon_sysfs_intervals, kobj); + + return sysfs_emit(buf, "%lu\n", intervals->aggr_us); +} + +static ssize_t aggr_us_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_intervals *intervals = container_of(kobj, + struct damon_sysfs_intervals, kobj); + unsigned long us; + int err = kstrtoul(buf, 0, &us); + + if (err) + return err; + + intervals->aggr_us = us; + return count; +} + +static ssize_t update_us_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_intervals *intervals = container_of(kobj, + struct damon_sysfs_intervals, kobj); + + return sysfs_emit(buf, "%lu\n", intervals->update_us); +} + +static ssize_t update_us_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_intervals *intervals = container_of(kobj, + struct damon_sysfs_intervals, kobj); + unsigned long us; + int err = kstrtoul(buf, 0, &us); + + if (err) + return err; + + intervals->update_us = us; + return count; +} + +static void damon_sysfs_intervals_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_intervals, kobj)); +} + +static struct kobj_attribute damon_sysfs_intervals_sample_us_attr = + __ATTR_RW_MODE(sample_us, 0600); + +static struct kobj_attribute damon_sysfs_intervals_aggr_us_attr = + __ATTR_RW_MODE(aggr_us, 0600); + +static struct kobj_attribute damon_sysfs_intervals_update_us_attr = + __ATTR_RW_MODE(update_us, 0600); + +static struct attribute *damon_sysfs_intervals_attrs[] = { + &damon_sysfs_intervals_sample_us_attr.attr, + &damon_sysfs_intervals_aggr_us_attr.attr, + &damon_sysfs_intervals_update_us_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_intervals); + +static const struct kobj_type damon_sysfs_intervals_ktype = { + .release = damon_sysfs_intervals_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_intervals_groups, +}; + +/* + * monitoring_attrs directory + */ + +struct damon_sysfs_attrs { + struct kobject kobj; + struct damon_sysfs_intervals *intervals; + struct damon_sysfs_ul_range *nr_regions_range; +}; + +static struct damon_sysfs_attrs *damon_sysfs_attrs_alloc(void) +{ + struct damon_sysfs_attrs *attrs = kmalloc(sizeof(*attrs), GFP_KERNEL); + + if (!attrs) + return NULL; + attrs->kobj = (struct kobject){}; + return attrs; +} + +static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs) +{ + struct damon_sysfs_intervals *intervals; + struct damon_sysfs_ul_range *nr_regions_range; + int err; + + intervals = damon_sysfs_intervals_alloc(5000, 100000, 60000000); + if (!intervals) + return -ENOMEM; + + err = kobject_init_and_add(&intervals->kobj, + &damon_sysfs_intervals_ktype, &attrs->kobj, + "intervals"); + if (err) + goto put_intervals_out; + err = damon_sysfs_intervals_add_dirs(intervals); + if (err) + goto put_intervals_out; + attrs->intervals = intervals; + + nr_regions_range = damon_sysfs_ul_range_alloc(10, 1000); + if (!nr_regions_range) { + err = -ENOMEM; + goto put_intervals_out; + } + + err = kobject_init_and_add(&nr_regions_range->kobj, + &damon_sysfs_ul_range_ktype, &attrs->kobj, + "nr_regions"); + if (err) + goto put_nr_regions_intervals_out; + attrs->nr_regions_range = nr_regions_range; + return 0; + +put_nr_regions_intervals_out: + kobject_put(&nr_regions_range->kobj); + attrs->nr_regions_range = NULL; +put_intervals_out: + kobject_put(&intervals->kobj); + attrs->intervals = NULL; + return err; +} + +static void damon_sysfs_attrs_rm_dirs(struct damon_sysfs_attrs *attrs) +{ + kobject_put(&attrs->nr_regions_range->kobj); + damon_sysfs_intervals_rm_dirs(attrs->intervals); + kobject_put(&attrs->intervals->kobj); +} + +static void damon_sysfs_attrs_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_attrs, kobj)); +} + +static struct attribute *damon_sysfs_attrs_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_attrs); + +static const struct kobj_type damon_sysfs_attrs_ktype = { + .release = damon_sysfs_attrs_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_attrs_groups, +}; + +/* + * context directory + */ + +struct damon_sysfs_ops_name { + enum damon_ops_id ops_id; + char *name; +}; + +static const struct damon_sysfs_ops_name damon_sysfs_ops_names[] = { + { + .ops_id = DAMON_OPS_VADDR, + .name = "vaddr", + }, + { + .ops_id = DAMON_OPS_FVADDR, + .name = "fvaddr", + }, + { + .ops_id = DAMON_OPS_PADDR, + .name = "paddr", + }, +}; + +struct damon_sysfs_context { + struct kobject kobj; + enum damon_ops_id ops_id; + unsigned long addr_unit; + struct damon_sysfs_attrs *attrs; + struct damon_sysfs_targets *targets; + struct damon_sysfs_schemes *schemes; +}; + +static struct damon_sysfs_context *damon_sysfs_context_alloc( + enum damon_ops_id ops_id) +{ + struct damon_sysfs_context *context = kmalloc(sizeof(*context), + GFP_KERNEL); + + if (!context) + return NULL; + context->kobj = (struct kobject){}; + context->ops_id = ops_id; + context->addr_unit = 1; + return context; +} + +static int damon_sysfs_context_set_attrs(struct damon_sysfs_context *context) +{ + struct damon_sysfs_attrs *attrs = damon_sysfs_attrs_alloc(); + int err; + + if (!attrs) + return -ENOMEM; + err = kobject_init_and_add(&attrs->kobj, &damon_sysfs_attrs_ktype, + &context->kobj, "monitoring_attrs"); + if (err) + goto out; + err = damon_sysfs_attrs_add_dirs(attrs); + if (err) + goto out; + context->attrs = attrs; + return 0; + +out: + kobject_put(&attrs->kobj); + return err; +} + +static int damon_sysfs_context_set_targets(struct damon_sysfs_context *context) +{ + struct damon_sysfs_targets *targets = damon_sysfs_targets_alloc(); + int err; + + if (!targets) + return -ENOMEM; + err = kobject_init_and_add(&targets->kobj, &damon_sysfs_targets_ktype, + &context->kobj, "targets"); + if (err) { + kobject_put(&targets->kobj); + return err; + } + context->targets = targets; + return 0; +} + +static int damon_sysfs_context_set_schemes(struct damon_sysfs_context *context) +{ + struct damon_sysfs_schemes *schemes = damon_sysfs_schemes_alloc(); + int err; + + if (!schemes) + return -ENOMEM; + err = kobject_init_and_add(&schemes->kobj, &damon_sysfs_schemes_ktype, + &context->kobj, "schemes"); + if (err) { + kobject_put(&schemes->kobj); + return err; + } + context->schemes = schemes; + return 0; +} + +static int damon_sysfs_context_add_dirs(struct damon_sysfs_context *context) +{ + int err; + + err = damon_sysfs_context_set_attrs(context); + if (err) + return err; + + err = damon_sysfs_context_set_targets(context); + if (err) + goto put_attrs_out; + + err = damon_sysfs_context_set_schemes(context); + if (err) + goto put_targets_attrs_out; + return 0; + +put_targets_attrs_out: + kobject_put(&context->targets->kobj); + context->targets = NULL; +put_attrs_out: + kobject_put(&context->attrs->kobj); + context->attrs = NULL; + return err; +} + +static void damon_sysfs_context_rm_dirs(struct damon_sysfs_context *context) +{ + damon_sysfs_attrs_rm_dirs(context->attrs); + kobject_put(&context->attrs->kobj); + damon_sysfs_targets_rm_dirs(context->targets); + kobject_put(&context->targets->kobj); + damon_sysfs_schemes_rm_dirs(context->schemes); + kobject_put(&context->schemes->kobj); +} + +static ssize_t avail_operations_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int len = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(damon_sysfs_ops_names); i++) { + const struct damon_sysfs_ops_name *ops_name; + + ops_name = &damon_sysfs_ops_names[i]; + if (!damon_is_registered_ops(ops_name->ops_id)) + continue; + len += sysfs_emit_at(buf, len, "%s\n", ops_name->name); + } + return len; +} + +static ssize_t operations_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_context *context = container_of(kobj, + struct damon_sysfs_context, kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(damon_sysfs_ops_names); i++) { + const struct damon_sysfs_ops_name *ops_name; + + ops_name = &damon_sysfs_ops_names[i]; + if (ops_name->ops_id == context->ops_id) + return sysfs_emit(buf, "%s\n", ops_name->name); + } + return -EINVAL; +} + +static ssize_t operations_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_context *context = container_of(kobj, + struct damon_sysfs_context, kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(damon_sysfs_ops_names); i++) { + const struct damon_sysfs_ops_name *ops_name; + + ops_name = &damon_sysfs_ops_names[i]; + if (sysfs_streq(buf, ops_name->name)) { + context->ops_id = ops_name->ops_id; + return count; + } + } + return -EINVAL; +} + +static ssize_t addr_unit_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_context *context = container_of(kobj, + struct damon_sysfs_context, kobj); + + return sysfs_emit(buf, "%lu\n", context->addr_unit); +} + +static ssize_t addr_unit_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_context *context = container_of(kobj, + struct damon_sysfs_context, kobj); + unsigned long input_addr_unit; + int err = kstrtoul(buf, 0, &input_addr_unit); + + if (err) + return err; + if (!input_addr_unit) + return -EINVAL; + + context->addr_unit = input_addr_unit; + return count; +} + +static void damon_sysfs_context_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_context, kobj)); +} + +static struct kobj_attribute damon_sysfs_context_avail_operations_attr = + __ATTR_RO_MODE(avail_operations, 0400); + +static struct kobj_attribute damon_sysfs_context_operations_attr = + __ATTR_RW_MODE(operations, 0600); + +static struct kobj_attribute damon_sysfs_context_addr_unit_attr = + __ATTR_RW_MODE(addr_unit, 0600); + +static struct attribute *damon_sysfs_context_attrs[] = { + &damon_sysfs_context_avail_operations_attr.attr, + &damon_sysfs_context_operations_attr.attr, + &damon_sysfs_context_addr_unit_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_context); + +static const struct kobj_type damon_sysfs_context_ktype = { + .release = damon_sysfs_context_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_context_groups, +}; + +/* + * contexts directory + */ + +struct damon_sysfs_contexts { + struct kobject kobj; + struct damon_sysfs_context **contexts_arr; + int nr; +}; + +static struct damon_sysfs_contexts *damon_sysfs_contexts_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_contexts), GFP_KERNEL); +} + +static void damon_sysfs_contexts_rm_dirs(struct damon_sysfs_contexts *contexts) +{ + struct damon_sysfs_context **contexts_arr = contexts->contexts_arr; + int i; + + for (i = 0; i < contexts->nr; i++) { + damon_sysfs_context_rm_dirs(contexts_arr[i]); + kobject_put(&contexts_arr[i]->kobj); + } + contexts->nr = 0; + kfree(contexts_arr); + contexts->contexts_arr = NULL; +} + +static int damon_sysfs_contexts_add_dirs(struct damon_sysfs_contexts *contexts, + int nr_contexts) +{ + struct damon_sysfs_context **contexts_arr, *context; + int err, i; + + damon_sysfs_contexts_rm_dirs(contexts); + if (!nr_contexts) + return 0; + + contexts_arr = kmalloc_array(nr_contexts, sizeof(*contexts_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!contexts_arr) + return -ENOMEM; + contexts->contexts_arr = contexts_arr; + + for (i = 0; i < nr_contexts; i++) { + context = damon_sysfs_context_alloc(DAMON_OPS_VADDR); + if (!context) { + damon_sysfs_contexts_rm_dirs(contexts); + return -ENOMEM; + } + + err = kobject_init_and_add(&context->kobj, + &damon_sysfs_context_ktype, &contexts->kobj, + "%d", i); + if (err) + goto out; + + err = damon_sysfs_context_add_dirs(context); + if (err) + goto out; + + contexts_arr[i] = context; + contexts->nr++; + } + return 0; + +out: + damon_sysfs_contexts_rm_dirs(contexts); + kobject_put(&context->kobj); + return err; +} + +static ssize_t nr_contexts_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_contexts *contexts = container_of(kobj, + struct damon_sysfs_contexts, kobj); + + return sysfs_emit(buf, "%d\n", contexts->nr); +} + +static ssize_t nr_contexts_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_contexts *contexts; + int nr, err; + + err = kstrtoint(buf, 0, &nr); + if (err) + return err; + /* TODO: support multiple contexts per kdamond */ + if (nr < 0 || 1 < nr) + return -EINVAL; + + contexts = container_of(kobj, struct damon_sysfs_contexts, kobj); + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damon_sysfs_contexts_add_dirs(contexts, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + + return count; +} + +static void damon_sysfs_contexts_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_contexts, kobj)); +} + +static struct kobj_attribute damon_sysfs_contexts_nr_attr + = __ATTR_RW_MODE(nr_contexts, 0600); + +static struct attribute *damon_sysfs_contexts_attrs[] = { + &damon_sysfs_contexts_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_contexts); + +static const struct kobj_type damon_sysfs_contexts_ktype = { + .release = damon_sysfs_contexts_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_contexts_groups, +}; + +/* + * kdamond directory + */ + +struct damon_sysfs_kdamond { + struct kobject kobj; + struct damon_sysfs_contexts *contexts; + struct damon_ctx *damon_ctx; + unsigned int refresh_ms; +}; + +static struct damon_sysfs_kdamond *damon_sysfs_kdamond_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_kdamond), GFP_KERNEL); +} + +static int damon_sysfs_kdamond_add_dirs(struct damon_sysfs_kdamond *kdamond) +{ + struct damon_sysfs_contexts *contexts; + int err; + + contexts = damon_sysfs_contexts_alloc(); + if (!contexts) + return -ENOMEM; + + err = kobject_init_and_add(&contexts->kobj, + &damon_sysfs_contexts_ktype, &kdamond->kobj, + "contexts"); + if (err) { + kobject_put(&contexts->kobj); + return err; + } + kdamond->contexts = contexts; + + return err; +} + +static void damon_sysfs_kdamond_rm_dirs(struct damon_sysfs_kdamond *kdamond) +{ + damon_sysfs_contexts_rm_dirs(kdamond->contexts); + kobject_put(&kdamond->contexts->kobj); +} + +/* + * enum damon_sysfs_cmd - Commands for a specific kdamond. + */ +enum damon_sysfs_cmd { + /* @DAMON_SYSFS_CMD_ON: Turn the kdamond on. */ + DAMON_SYSFS_CMD_ON, + /* @DAMON_SYSFS_CMD_OFF: Turn the kdamond off. */ + DAMON_SYSFS_CMD_OFF, + /* @DAMON_SYSFS_CMD_COMMIT: Update kdamond inputs. */ + DAMON_SYSFS_CMD_COMMIT, + /* + * @DAMON_SYSFS_CMD_COMMIT_SCHEMES_QUOTA_GOALS: Commit the quota goals + * to DAMON. + */ + DAMON_SYSFS_CMD_COMMIT_SCHEMES_QUOTA_GOALS, + /* + * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS: Update scheme stats sysfs + * files. + */ + DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS, + /* + * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES: Update + * tried_regions/total_bytes sysfs files for each scheme. + */ + DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES, + /* + * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS: Update schemes tried + * regions + */ + DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS, + /* + * @DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS: Clear schemes tried + * regions + */ + DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS, + /* + * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS: Update the + * effective size quota of the scheme in bytes. + */ + DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS, + /* + * @DAMON_SYSFS_CMD_UPDATE_TUNED_INTERVALS: Update the tuned monitoring + * intervals. + */ + DAMON_SYSFS_CMD_UPDATE_TUNED_INTERVALS, + /* + * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands. + */ + NR_DAMON_SYSFS_CMDS, +}; + +/* Should match with enum damon_sysfs_cmd */ +static const char * const damon_sysfs_cmd_strs[] = { + "on", + "off", + "commit", + "commit_schemes_quota_goals", + "update_schemes_stats", + "update_schemes_tried_bytes", + "update_schemes_tried_regions", + "clear_schemes_tried_regions", + "update_schemes_effective_quotas", + "update_tuned_intervals", +}; + +static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_kdamond *kdamond = container_of(kobj, + struct damon_sysfs_kdamond, kobj); + struct damon_ctx *ctx; + bool running = false; + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + + ctx = kdamond->damon_ctx; + if (ctx) + running = damon_is_running(ctx); + + mutex_unlock(&damon_sysfs_lock); + + return sysfs_emit(buf, "%s\n", running ? + damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_ON] : + damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_OFF]); +} + +static int damon_sysfs_set_attrs(struct damon_ctx *ctx, + struct damon_sysfs_attrs *sys_attrs) +{ + struct damon_sysfs_intervals *sys_intervals = sys_attrs->intervals; + struct damon_sysfs_intervals_goal *sys_goal = + sys_intervals->intervals_goal; + struct damon_sysfs_ul_range *sys_nr_regions = + sys_attrs->nr_regions_range; + struct damon_attrs attrs = { + .sample_interval = sys_intervals->sample_us, + .aggr_interval = sys_intervals->aggr_us, + .intervals_goal = { + .access_bp = sys_goal->access_bp, + .aggrs = sys_goal->aggrs, + .min_sample_us = sys_goal->min_sample_us, + .max_sample_us = sys_goal->max_sample_us}, + .ops_update_interval = sys_intervals->update_us, + .min_nr_regions = sys_nr_regions->min, + .max_nr_regions = sys_nr_regions->max, + }; + return damon_set_attrs(ctx, &attrs); +} + +static int damon_sysfs_set_regions(struct damon_target *t, + struct damon_sysfs_regions *sysfs_regions, + unsigned long min_sz_region) +{ + struct damon_addr_range *ranges = kmalloc_array(sysfs_regions->nr, + sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN); + int i, err = -EINVAL; + + if (!ranges) + return -ENOMEM; + for (i = 0; i < sysfs_regions->nr; i++) { + struct damon_sysfs_region *sys_region = + sysfs_regions->regions_arr[i]; + + if (sys_region->ar.start > sys_region->ar.end) + goto out; + + ranges[i].start = sys_region->ar.start; + ranges[i].end = sys_region->ar.end; + if (i == 0) + continue; + if (ranges[i - 1].end > ranges[i].start) + goto out; + } + err = damon_set_regions(t, ranges, sysfs_regions->nr, min_sz_region); +out: + kfree(ranges); + return err; + +} + +static int damon_sysfs_add_target(struct damon_sysfs_target *sys_target, + struct damon_ctx *ctx) +{ + struct damon_target *t = damon_new_target(); + + if (!t) + return -ENOMEM; + damon_add_target(ctx, t); + if (damon_target_has_pid(ctx)) { + t->pid = find_get_pid(sys_target->pid); + if (!t->pid) + /* caller will destroy targets */ + return -EINVAL; + } + t->obsolete = sys_target->obsolete; + return damon_sysfs_set_regions(t, sys_target->regions, ctx->min_sz_region); +} + +static int damon_sysfs_add_targets(struct damon_ctx *ctx, + struct damon_sysfs_targets *sysfs_targets) +{ + int i, err; + + /* Multiple physical address space monitoring targets makes no sense */ + if (ctx->ops.id == DAMON_OPS_PADDR && sysfs_targets->nr > 1) + return -EINVAL; + + for (i = 0; i < sysfs_targets->nr; i++) { + struct damon_sysfs_target *st = sysfs_targets->targets_arr[i]; + + err = damon_sysfs_add_target(st, ctx); + if (err) + return err; + } + return 0; +} + +/* + * damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files. + * @data: The kobject wrapper that associated to the kdamond thread. + * + * This function reads the schemes stats of specific kdamond and update the + * related values for sysfs files. This function should be called from DAMON + * worker thread,to safely access the DAMON contexts-internal data. Caller + * should also ensure holding ``damon_syfs_lock``, and ->damon_ctx of @data is + * not NULL but a valid pointer, to safely access DAMON sysfs variables. + */ +static int damon_sysfs_upd_schemes_stats(void *data) +{ + struct damon_sysfs_kdamond *kdamond = data; + struct damon_ctx *ctx = kdamond->damon_ctx; + + damon_sysfs_schemes_update_stats( + kdamond->contexts->contexts_arr[0]->schemes, ctx); + return 0; +} + +static inline bool damon_sysfs_kdamond_running( + struct damon_sysfs_kdamond *kdamond) +{ + return kdamond->damon_ctx && + damon_is_running(kdamond->damon_ctx); +} + +static int damon_sysfs_apply_inputs(struct damon_ctx *ctx, + struct damon_sysfs_context *sys_ctx) +{ + int err; + + err = damon_select_ops(ctx, sys_ctx->ops_id); + if (err) + return err; + ctx->addr_unit = sys_ctx->addr_unit; + /* addr_unit is respected by only DAMON_OPS_PADDR */ + if (sys_ctx->ops_id == DAMON_OPS_PADDR) + ctx->min_sz_region = max( + DAMON_MIN_REGION / sys_ctx->addr_unit, 1); + err = damon_sysfs_set_attrs(ctx, sys_ctx->attrs); + if (err) + return err; + err = damon_sysfs_add_targets(ctx, sys_ctx->targets); + if (err) + return err; + return damon_sysfs_add_schemes(ctx, sys_ctx->schemes); +} + +static struct damon_ctx *damon_sysfs_build_ctx( + struct damon_sysfs_context *sys_ctx); + +/* + * Return a new damon_ctx for testing new parameters to commit. + */ +static struct damon_ctx *damon_sysfs_new_test_ctx( + struct damon_ctx *running_ctx) +{ + struct damon_ctx *test_ctx; + int err; + + test_ctx = damon_new_ctx(); + if (!test_ctx) + return NULL; + err = damon_commit_ctx(test_ctx, running_ctx); + if (err) { + damon_destroy_ctx(test_ctx); + return NULL; + } + return test_ctx; +} + +/* + * damon_sysfs_commit_input() - Commit user inputs to a running kdamond. + * @kdamond: The kobject wrapper for the associated kdamond. + * + * Returns error if the sysfs input is wrong. + */ +static int damon_sysfs_commit_input(void *data) +{ + struct damon_sysfs_kdamond *kdamond = data; + struct damon_ctx *param_ctx, *test_ctx; + int err; + + if (!damon_sysfs_kdamond_running(kdamond)) + return -EINVAL; + /* TODO: Support multiple contexts per kdamond */ + if (kdamond->contexts->nr != 1) + return -EINVAL; + + param_ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]); + if (IS_ERR(param_ctx)) + return PTR_ERR(param_ctx); + test_ctx = damon_sysfs_new_test_ctx(kdamond->damon_ctx); + if (!test_ctx) + return -ENOMEM; + err = damon_commit_ctx(test_ctx, param_ctx); + if (err) + goto out; + err = damon_commit_ctx(kdamond->damon_ctx, param_ctx); +out: + damon_destroy_ctx(test_ctx); + damon_destroy_ctx(param_ctx); + return err; +} + +static int damon_sysfs_commit_schemes_quota_goals(void *data) +{ + struct damon_sysfs_kdamond *sysfs_kdamond = data; + struct damon_ctx *ctx; + struct damon_sysfs_context *sysfs_ctx; + + if (!damon_sysfs_kdamond_running(sysfs_kdamond)) + return -EINVAL; + /* TODO: Support multiple contexts per kdamond */ + if (sysfs_kdamond->contexts->nr != 1) + return -EINVAL; + + ctx = sysfs_kdamond->damon_ctx; + sysfs_ctx = sysfs_kdamond->contexts->contexts_arr[0]; + return damos_sysfs_set_quota_scores(sysfs_ctx->schemes, ctx); +} + +/* + * damon_sysfs_upd_schemes_effective_quotas() - Update schemes effective quotas + * sysfs files. + * @data: The kobject wrapper that associated to the kdamond thread. + * + * This function reads the schemes' effective quotas of specific kdamond and + * update the related values for sysfs files. This function should be called + * from DAMON callbacks while holding ``damon_syfs_lock``, to safely access the + * DAMON contexts-internal data and DAMON sysfs variables. + */ +static int damon_sysfs_upd_schemes_effective_quotas(void *data) +{ + struct damon_sysfs_kdamond *kdamond = data; + struct damon_ctx *ctx = kdamond->damon_ctx; + + damos_sysfs_update_effective_quotas( + kdamond->contexts->contexts_arr[0]->schemes, ctx); + return 0; +} + +static int damon_sysfs_upd_tuned_intervals(void *data) +{ + struct damon_sysfs_kdamond *kdamond = data; + struct damon_ctx *ctx = kdamond->damon_ctx; + + kdamond->contexts->contexts_arr[0]->attrs->intervals->sample_us = + ctx->attrs.sample_interval; + kdamond->contexts->contexts_arr[0]->attrs->intervals->aggr_us = + ctx->attrs.aggr_interval; + return 0; +} + +static struct damon_ctx *damon_sysfs_build_ctx( + struct damon_sysfs_context *sys_ctx) +{ + struct damon_ctx *ctx = damon_new_ctx(); + int err; + + if (!ctx) + return ERR_PTR(-ENOMEM); + + err = damon_sysfs_apply_inputs(ctx, sys_ctx); + if (err) { + damon_destroy_ctx(ctx); + return ERR_PTR(err); + } + + return ctx; +} + +static unsigned long damon_sysfs_next_update_jiffies; + +static int damon_sysfs_repeat_call_fn(void *data) +{ + struct damon_sysfs_kdamond *sysfs_kdamond = data; + + if (!sysfs_kdamond->refresh_ms) + return 0; + if (time_before(jiffies, damon_sysfs_next_update_jiffies)) + return 0; + damon_sysfs_next_update_jiffies = jiffies + + msecs_to_jiffies(sysfs_kdamond->refresh_ms); + + if (!mutex_trylock(&damon_sysfs_lock)) + return 0; + damon_sysfs_upd_tuned_intervals(sysfs_kdamond); + damon_sysfs_upd_schemes_stats(sysfs_kdamond); + damon_sysfs_upd_schemes_effective_quotas(sysfs_kdamond); + mutex_unlock(&damon_sysfs_lock); + return 0; +} + +static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond) +{ + struct damon_ctx *ctx; + struct damon_call_control *repeat_call_control; + int err; + + if (damon_sysfs_kdamond_running(kdamond)) + return -EBUSY; + /* TODO: support multiple contexts per kdamond */ + if (kdamond->contexts->nr != 1) + return -EINVAL; + + if (kdamond->damon_ctx) + damon_destroy_ctx(kdamond->damon_ctx); + kdamond->damon_ctx = NULL; + + repeat_call_control = kmalloc(sizeof(*repeat_call_control), + GFP_KERNEL); + if (!repeat_call_control) + return -ENOMEM; + + ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]); + if (IS_ERR(ctx)) { + kfree(repeat_call_control); + return PTR_ERR(ctx); + } + err = damon_start(&ctx, 1, false); + if (err) { + kfree(repeat_call_control); + damon_destroy_ctx(ctx); + return err; + } + kdamond->damon_ctx = ctx; + + damon_sysfs_next_update_jiffies = + jiffies + msecs_to_jiffies(kdamond->refresh_ms); + + repeat_call_control->fn = damon_sysfs_repeat_call_fn; + repeat_call_control->data = kdamond; + repeat_call_control->repeat = true; + repeat_call_control->dealloc_on_cancel = true; + damon_call(ctx, repeat_call_control); + return err; +} + +static int damon_sysfs_turn_damon_off(struct damon_sysfs_kdamond *kdamond) +{ + if (!kdamond->damon_ctx) + return -EINVAL; + return damon_stop(&kdamond->damon_ctx, 1); + /* + * To allow users show final monitoring results of already turned-off + * DAMON, we free kdamond->damon_ctx in next + * damon_sysfs_turn_damon_on(), or kdamonds_nr_store() + */ +} + +static int damon_sysfs_damon_call(int (*fn)(void *data), + struct damon_sysfs_kdamond *kdamond) +{ + struct damon_call_control call_control = {}; + int err; + + if (!kdamond->damon_ctx) + return -EINVAL; + call_control.fn = fn; + call_control.data = kdamond; + err = damon_call(kdamond->damon_ctx, &call_control); + return err ? err : call_control.return_code; +} + +struct damon_sysfs_schemes_walk_data { + struct damon_sysfs_kdamond *sysfs_kdamond; + bool total_bytes_only; +}; + +/* populate the region directory */ +static void damon_sysfs_schemes_tried_regions_upd_one(void *data, struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *s, unsigned long sz_filter_passed) +{ + struct damon_sysfs_schemes_walk_data *walk_data = data; + struct damon_sysfs_kdamond *sysfs_kdamond = walk_data->sysfs_kdamond; + + damos_sysfs_populate_region_dir( + sysfs_kdamond->contexts->contexts_arr[0]->schemes, + ctx, t, r, s, walk_data->total_bytes_only, + sz_filter_passed); +} + +static int damon_sysfs_update_schemes_tried_regions( + struct damon_sysfs_kdamond *sysfs_kdamond, bool total_bytes_only) +{ + struct damon_sysfs_schemes_walk_data walk_data = { + .sysfs_kdamond = sysfs_kdamond, + .total_bytes_only = total_bytes_only, + }; + struct damos_walk_control control = { + .walk_fn = damon_sysfs_schemes_tried_regions_upd_one, + .data = &walk_data, + }; + struct damon_ctx *ctx = sysfs_kdamond->damon_ctx; + + if (!ctx) + return -EINVAL; + + damon_sysfs_schemes_clear_regions( + sysfs_kdamond->contexts->contexts_arr[0]->schemes); + return damos_walk(ctx, &control); +} + +/* + * damon_sysfs_handle_cmd() - Handle a command for a specific kdamond. + * @cmd: The command to handle. + * @kdamond: The kobject wrapper for the associated kdamond. + * + * This function handles a DAMON sysfs command for a kdamond. + * + * Return: 0 on success, negative error code otherwise. + */ +static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd, + struct damon_sysfs_kdamond *kdamond) +{ + switch (cmd) { + case DAMON_SYSFS_CMD_ON: + return damon_sysfs_turn_damon_on(kdamond); + case DAMON_SYSFS_CMD_OFF: + return damon_sysfs_turn_damon_off(kdamond); + case DAMON_SYSFS_CMD_COMMIT: + return damon_sysfs_damon_call( + damon_sysfs_commit_input, kdamond); + case DAMON_SYSFS_CMD_COMMIT_SCHEMES_QUOTA_GOALS: + return damon_sysfs_damon_call( + damon_sysfs_commit_schemes_quota_goals, + kdamond); + case DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS: + return damon_sysfs_damon_call( + damon_sysfs_upd_schemes_stats, kdamond); + case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES: + return damon_sysfs_update_schemes_tried_regions(kdamond, true); + case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS: + return damon_sysfs_update_schemes_tried_regions(kdamond, false); + case DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS: + return damon_sysfs_schemes_clear_regions( + kdamond->contexts->contexts_arr[0]->schemes); + case DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS: + return damon_sysfs_damon_call( + damon_sysfs_upd_schemes_effective_quotas, + kdamond); + case DAMON_SYSFS_CMD_UPDATE_TUNED_INTERVALS: + return damon_sysfs_damon_call( + damon_sysfs_upd_tuned_intervals, kdamond); + default: + return -EINVAL; + } +} + +static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_kdamond *kdamond = container_of(kobj, + struct damon_sysfs_kdamond, kobj); + enum damon_sysfs_cmd cmd; + ssize_t ret = -EINVAL; + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + for (cmd = 0; cmd < NR_DAMON_SYSFS_CMDS; cmd++) { + if (sysfs_streq(buf, damon_sysfs_cmd_strs[cmd])) { + ret = damon_sysfs_handle_cmd(cmd, kdamond); + break; + } + } + mutex_unlock(&damon_sysfs_lock); + if (!ret) + ret = count; + return ret; +} + +static ssize_t pid_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_kdamond *kdamond = container_of(kobj, + struct damon_sysfs_kdamond, kobj); + struct damon_ctx *ctx; + int pid = -1; + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + ctx = kdamond->damon_ctx; + if (!ctx) + goto out; + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) + pid = ctx->kdamond->pid; + mutex_unlock(&ctx->kdamond_lock); +out: + mutex_unlock(&damon_sysfs_lock); + return sysfs_emit(buf, "%d\n", pid); +} + +static ssize_t refresh_ms_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_kdamond *kdamond = container_of(kobj, + struct damon_sysfs_kdamond, kobj); + + return sysfs_emit(buf, "%u\n", kdamond->refresh_ms); +} + +static ssize_t refresh_ms_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_kdamond *kdamond = container_of(kobj, + struct damon_sysfs_kdamond, kobj); + unsigned int nr; + int err = kstrtouint(buf, 0, &nr); + + if (err) + return err; + + kdamond->refresh_ms = nr; + return count; +} + +static void damon_sysfs_kdamond_release(struct kobject *kobj) +{ + struct damon_sysfs_kdamond *kdamond = container_of(kobj, + struct damon_sysfs_kdamond, kobj); + + if (kdamond->damon_ctx) + damon_destroy_ctx(kdamond->damon_ctx); + kfree(kdamond); +} + +static struct kobj_attribute damon_sysfs_kdamond_state_attr = + __ATTR_RW_MODE(state, 0600); + +static struct kobj_attribute damon_sysfs_kdamond_pid_attr = + __ATTR_RO_MODE(pid, 0400); + +static struct kobj_attribute damon_sysfs_kdamond_refresh_ms_attr = + __ATTR_RW_MODE(refresh_ms, 0600); + +static struct attribute *damon_sysfs_kdamond_attrs[] = { + &damon_sysfs_kdamond_state_attr.attr, + &damon_sysfs_kdamond_pid_attr.attr, + &damon_sysfs_kdamond_refresh_ms_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_kdamond); + +static const struct kobj_type damon_sysfs_kdamond_ktype = { + .release = damon_sysfs_kdamond_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_kdamond_groups, +}; + +/* + * kdamonds directory + */ + +struct damon_sysfs_kdamonds { + struct kobject kobj; + struct damon_sysfs_kdamond **kdamonds_arr; + int nr; +}; + +static struct damon_sysfs_kdamonds *damon_sysfs_kdamonds_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_kdamonds), GFP_KERNEL); +} + +static void damon_sysfs_kdamonds_rm_dirs(struct damon_sysfs_kdamonds *kdamonds) +{ + struct damon_sysfs_kdamond **kdamonds_arr = kdamonds->kdamonds_arr; + int i; + + for (i = 0; i < kdamonds->nr; i++) { + damon_sysfs_kdamond_rm_dirs(kdamonds_arr[i]); + kobject_put(&kdamonds_arr[i]->kobj); + } + kdamonds->nr = 0; + kfree(kdamonds_arr); + kdamonds->kdamonds_arr = NULL; +} + +static bool damon_sysfs_kdamonds_busy(struct damon_sysfs_kdamond **kdamonds, + int nr_kdamonds) +{ + int i; + + for (i = 0; i < nr_kdamonds; i++) { + if (damon_sysfs_kdamond_running(kdamonds[i])) + return true; + } + + return false; +} + +static int damon_sysfs_kdamonds_add_dirs(struct damon_sysfs_kdamonds *kdamonds, + int nr_kdamonds) +{ + struct damon_sysfs_kdamond **kdamonds_arr, *kdamond; + int err, i; + + if (damon_sysfs_kdamonds_busy(kdamonds->kdamonds_arr, kdamonds->nr)) + return -EBUSY; + + damon_sysfs_kdamonds_rm_dirs(kdamonds); + if (!nr_kdamonds) + return 0; + + kdamonds_arr = kmalloc_array(nr_kdamonds, sizeof(*kdamonds_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!kdamonds_arr) + return -ENOMEM; + kdamonds->kdamonds_arr = kdamonds_arr; + + for (i = 0; i < nr_kdamonds; i++) { + kdamond = damon_sysfs_kdamond_alloc(); + if (!kdamond) { + damon_sysfs_kdamonds_rm_dirs(kdamonds); + return -ENOMEM; + } + + err = kobject_init_and_add(&kdamond->kobj, + &damon_sysfs_kdamond_ktype, &kdamonds->kobj, + "%d", i); + if (err) + goto out; + + err = damon_sysfs_kdamond_add_dirs(kdamond); + if (err) + goto out; + + kdamonds_arr[i] = kdamond; + kdamonds->nr++; + } + return 0; + +out: + damon_sysfs_kdamonds_rm_dirs(kdamonds); + kobject_put(&kdamond->kobj); + return err; +} + +static ssize_t nr_kdamonds_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_kdamonds *kdamonds = container_of(kobj, + struct damon_sysfs_kdamonds, kobj); + + return sysfs_emit(buf, "%d\n", kdamonds->nr); +} + +static ssize_t nr_kdamonds_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_kdamonds *kdamonds; + int nr, err; + + err = kstrtoint(buf, 0, &nr); + if (err) + return err; + if (nr < 0) + return -EINVAL; + + kdamonds = container_of(kobj, struct damon_sysfs_kdamonds, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damon_sysfs_kdamonds_add_dirs(kdamonds, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + + return count; +} + +static void damon_sysfs_kdamonds_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_kdamonds, kobj)); +} + +static struct kobj_attribute damon_sysfs_kdamonds_nr_attr = + __ATTR_RW_MODE(nr_kdamonds, 0600); + +static struct attribute *damon_sysfs_kdamonds_attrs[] = { + &damon_sysfs_kdamonds_nr_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_kdamonds); + +static const struct kobj_type damon_sysfs_kdamonds_ktype = { + .release = damon_sysfs_kdamonds_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_kdamonds_groups, +}; + +/* + * damon user interface directory + */ + +struct damon_sysfs_ui_dir { + struct kobject kobj; + struct damon_sysfs_kdamonds *kdamonds; +}; + +static struct damon_sysfs_ui_dir *damon_sysfs_ui_dir_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_ui_dir), GFP_KERNEL); +} + +static int damon_sysfs_ui_dir_add_dirs(struct damon_sysfs_ui_dir *ui_dir) +{ + struct damon_sysfs_kdamonds *kdamonds; + int err; + + kdamonds = damon_sysfs_kdamonds_alloc(); + if (!kdamonds) + return -ENOMEM; + + err = kobject_init_and_add(&kdamonds->kobj, + &damon_sysfs_kdamonds_ktype, &ui_dir->kobj, + "kdamonds"); + if (err) { + kobject_put(&kdamonds->kobj); + return err; + } + ui_dir->kdamonds = kdamonds; + return err; +} + +static void damon_sysfs_ui_dir_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_ui_dir, kobj)); +} + +static struct attribute *damon_sysfs_ui_dir_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_ui_dir); + +static const struct kobj_type damon_sysfs_ui_dir_ktype = { + .release = damon_sysfs_ui_dir_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_ui_dir_groups, +}; + +static int __init damon_sysfs_init(void) +{ + struct kobject *damon_sysfs_root; + struct damon_sysfs_ui_dir *admin; + int err; + + damon_sysfs_root = kobject_create_and_add("damon", mm_kobj); + if (!damon_sysfs_root) + return -ENOMEM; + + admin = damon_sysfs_ui_dir_alloc(); + if (!admin) { + kobject_put(damon_sysfs_root); + return -ENOMEM; + } + err = kobject_init_and_add(&admin->kobj, &damon_sysfs_ui_dir_ktype, + damon_sysfs_root, "admin"); + if (err) + goto out; + err = damon_sysfs_ui_dir_add_dirs(admin); + if (err) + goto out; + return 0; + +out: + kobject_put(&admin->kobj); + kobject_put(damon_sysfs_root); + return err; +} +subsys_initcall(damon_sysfs_init); + +#include "tests/sysfs-kunit.h" diff --git a/mm/damon/tests/.kunitconfig b/mm/damon/tests/.kunitconfig new file mode 100644 index 000000000000..36a450f57b58 --- /dev/null +++ b/mm/damon/tests/.kunitconfig @@ -0,0 +1,15 @@ +# for DAMON core +CONFIG_KUNIT=y +CONFIG_DAMON=y +CONFIG_DAMON_KUNIT_TEST=y + +# for DAMON vaddr ops +CONFIG_MMU=y +CONFIG_PAGE_IDLE_FLAG=y +CONFIG_DAMON_VADDR=y +CONFIG_DAMON_VADDR_KUNIT_TEST=y + +# for DAMON sysfs interface +CONFIG_SYSFS=y +CONFIG_DAMON_SYSFS=y +CONFIG_DAMON_SYSFS_KUNIT_TEST=y diff --git a/mm/damon/tests/core-kunit.h b/mm/damon/tests/core-kunit.h new file mode 100644 index 000000000000..a1eff023e928 --- /dev/null +++ b/mm/damon/tests/core-kunit.h @@ -0,0 +1,1249 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Access Monitor Unit Tests + * + * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#ifdef CONFIG_DAMON_KUNIT_TEST + +#ifndef _DAMON_CORE_TEST_H +#define _DAMON_CORE_TEST_H + +#include <kunit/test.h> + +static void damon_test_regions(struct kunit *test) +{ + struct damon_region *r; + struct damon_target *t; + + r = damon_new_region(1, 2); + if (!r) + kunit_skip(test, "region alloc fail"); + KUNIT_EXPECT_EQ(test, 1ul, r->ar.start); + KUNIT_EXPECT_EQ(test, 2ul, r->ar.end); + KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses); + + t = damon_new_target(); + if (!t) { + damon_free_region(r); + kunit_skip(test, "target alloc fail"); + } + KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t)); + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, 1u, damon_nr_regions(t)); + + damon_destroy_region(r, t); + KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t)); + + damon_free_target(t); +} + +static unsigned int nr_damon_targets(struct damon_ctx *ctx) +{ + struct damon_target *t; + unsigned int nr_targets = 0; + + damon_for_each_target(t, ctx) + nr_targets++; + + return nr_targets; +} + +static void damon_test_target(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + + if (!c) + kunit_skip(test, "ctx alloc fail"); + + t = damon_new_target(); + if (!t) { + damon_destroy_ctx(c); + kunit_skip(test, "target alloc fail"); + } + KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c)); + + damon_add_target(c, t); + KUNIT_EXPECT_EQ(test, 1u, nr_damon_targets(c)); + + damon_destroy_target(t, c); + KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c)); + + damon_destroy_ctx(c); +} + +/* + * Test kdamond_reset_aggregated() + * + * DAMON checks access to each region and aggregates this information as the + * access frequency of each region. In detail, it increases '->nr_accesses' of + * regions that an access has confirmed. 'kdamond_reset_aggregated()' flushes + * the aggregated information ('->nr_accesses' of each regions) to the result + * buffer. As a result of the flushing, the '->nr_accesses' of regions are + * initialized to zero. + */ +static void damon_test_aggregate(struct kunit *test) +{ + struct damon_ctx *ctx = damon_new_ctx(); + unsigned long saddr[][3] = {{10, 20, 30}, {5, 42, 49}, {13, 33, 55} }; + unsigned long eaddr[][3] = {{15, 27, 40}, {31, 45, 55}, {23, 44, 66} }; + unsigned long accesses[][3] = {{42, 95, 84}, {10, 20, 30}, {0, 1, 2} }; + struct damon_target *t; + struct damon_region *r; + int it, ir; + + if (!ctx) + kunit_skip(test, "ctx alloc fail"); + + for (it = 0; it < 3; it++) { + t = damon_new_target(); + if (!t) { + damon_destroy_ctx(ctx); + kunit_skip(test, "target alloc fail"); + } + damon_add_target(ctx, t); + } + + it = 0; + damon_for_each_target(t, ctx) { + for (ir = 0; ir < 3; ir++) { + r = damon_new_region(saddr[it][ir], eaddr[it][ir]); + if (!r) { + damon_destroy_ctx(ctx); + kunit_skip(test, "region alloc fail"); + } + r->nr_accesses = accesses[it][ir]; + r->nr_accesses_bp = accesses[it][ir] * 10000; + damon_add_region(r, t); + } + it++; + } + kdamond_reset_aggregated(ctx); + it = 0; + damon_for_each_target(t, ctx) { + ir = 0; + /* '->nr_accesses' should be zeroed */ + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses); + ir++; + } + /* regions should be preserved */ + KUNIT_EXPECT_EQ(test, 3, ir); + it++; + } + /* targets also should be preserved */ + KUNIT_EXPECT_EQ(test, 3, it); + + damon_destroy_ctx(ctx); +} + +static void damon_test_split_at(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r, *r_new; + + t = damon_new_target(); + if (!t) + kunit_skip(test, "target alloc fail"); + r = damon_new_region(0, 100); + if (!r) { + damon_free_target(t); + kunit_skip(test, "region alloc fail"); + } + r->nr_accesses_bp = 420000; + r->nr_accesses = 42; + r->last_nr_accesses = 15; + damon_add_region(r, t); + damon_split_region_at(t, r, 25); + KUNIT_EXPECT_EQ(test, r->ar.start, 0ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 25ul); + + r_new = damon_next_region(r); + KUNIT_EXPECT_EQ(test, r_new->ar.start, 25ul); + KUNIT_EXPECT_EQ(test, r_new->ar.end, 100ul); + + KUNIT_EXPECT_EQ(test, r->nr_accesses_bp, r_new->nr_accesses_bp); + KUNIT_EXPECT_EQ(test, r->nr_accesses, r_new->nr_accesses); + KUNIT_EXPECT_EQ(test, r->last_nr_accesses, r_new->last_nr_accesses); + + damon_free_target(t); +} + +static void damon_test_merge_two(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r, *r2, *r3; + int i; + + t = damon_new_target(); + if (!t) + kunit_skip(test, "target alloc fail"); + r = damon_new_region(0, 100); + if (!r) { + damon_free_target(t); + kunit_skip(test, "region alloc fail"); + } + r->nr_accesses = 10; + r->nr_accesses_bp = 100000; + damon_add_region(r, t); + r2 = damon_new_region(100, 300); + if (!r2) { + damon_free_target(t); + kunit_skip(test, "second region alloc fail"); + } + r2->nr_accesses = 20; + r2->nr_accesses_bp = 200000; + damon_add_region(r2, t); + + damon_merge_two_regions(t, r, r2); + KUNIT_EXPECT_EQ(test, r->ar.start, 0ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 300ul); + KUNIT_EXPECT_EQ(test, r->nr_accesses, 16u); + + i = 0; + damon_for_each_region(r3, t) { + KUNIT_EXPECT_PTR_EQ(test, r, r3); + i++; + } + KUNIT_EXPECT_EQ(test, i, 1); + + damon_free_target(t); +} + +static struct damon_region *__nth_region_of(struct damon_target *t, int idx) +{ + struct damon_region *r; + unsigned int i = 0; + + damon_for_each_region(r, t) { + if (i++ == idx) + return r; + } + + return NULL; +} + +static void damon_test_merge_regions_of(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sa[] = {0, 100, 114, 122, 130, 156, 170, 184}; + unsigned long ea[] = {100, 112, 122, 130, 156, 170, 184, 230}; + unsigned int nrs[] = {0, 0, 10, 10, 20, 30, 1, 2}; + + unsigned long saddrs[] = {0, 114, 130, 156, 170}; + unsigned long eaddrs[] = {112, 130, 156, 170, 230}; + int i; + + t = damon_new_target(); + if (!t) + kunit_skip(test, "target alloc fail"); + for (i = 0; i < ARRAY_SIZE(sa); i++) { + r = damon_new_region(sa[i], ea[i]); + if (!r) { + damon_free_target(t); + kunit_skip(test, "region alloc fail"); + } + r->nr_accesses = nrs[i]; + r->nr_accesses_bp = nrs[i] * 10000; + damon_add_region(r, t); + } + + damon_merge_regions_of(t, 9, 9999); + /* 0-112, 114-130, 130-156, 156-170 */ + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u); + for (i = 0; i < 5; i++) { + r = __nth_region_of(t, i); + KUNIT_EXPECT_EQ(test, r->ar.start, saddrs[i]); + KUNIT_EXPECT_EQ(test, r->ar.end, eaddrs[i]); + } + damon_free_target(t); +} + +static void damon_test_split_regions_of(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r; + + t = damon_new_target(); + if (!t) + kunit_skip(test, "target alloc fail"); + r = damon_new_region(0, 22); + if (!r) { + damon_free_target(t); + kunit_skip(test, "region alloc fail"); + } + damon_add_region(r, t); + damon_split_regions_of(t, 2, 1); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u); + damon_free_target(t); + + t = damon_new_target(); + if (!t) + kunit_skip(test, "second target alloc fail"); + r = damon_new_region(0, 220); + if (!r) { + damon_free_target(t); + kunit_skip(test, "second region alloc fail"); + } + damon_add_region(r, t); + damon_split_regions_of(t, 4, 1); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u); + damon_free_target(t); +} + +static void damon_test_ops_registration(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_operations ops = {.id = DAMON_OPS_VADDR}, bak; + bool need_cleanup = false; + + if (!c) + kunit_skip(test, "ctx alloc fail"); + + /* DAMON_OPS_VADDR is registered only if CONFIG_DAMON_VADDR is set */ + if (!damon_is_registered_ops(DAMON_OPS_VADDR)) { + bak.id = DAMON_OPS_VADDR; + KUNIT_EXPECT_EQ(test, damon_register_ops(&bak), 0); + need_cleanup = true; + } + + /* DAMON_OPS_VADDR is ensured to be registered */ + KUNIT_EXPECT_EQ(test, damon_select_ops(c, DAMON_OPS_VADDR), 0); + + /* Double-registration is prohibited */ + KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), -EINVAL); + + /* Unknown ops id cannot be registered */ + KUNIT_EXPECT_EQ(test, damon_select_ops(c, NR_DAMON_OPS), -EINVAL); + + /* Registration should success after unregistration */ + mutex_lock(&damon_ops_lock); + bak = damon_registered_ops[DAMON_OPS_VADDR]; + damon_registered_ops[DAMON_OPS_VADDR] = (struct damon_operations){}; + mutex_unlock(&damon_ops_lock); + + ops.id = DAMON_OPS_VADDR; + KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), 0); + + mutex_lock(&damon_ops_lock); + damon_registered_ops[DAMON_OPS_VADDR] = bak; + mutex_unlock(&damon_ops_lock); + + /* Check double-registration failure again */ + KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), -EINVAL); + + damon_destroy_ctx(c); + + if (need_cleanup) { + mutex_lock(&damon_ops_lock); + damon_registered_ops[DAMON_OPS_VADDR] = + (struct damon_operations){}; + mutex_unlock(&damon_ops_lock); + } +} + +static void damon_test_set_regions(struct kunit *test) +{ + struct damon_target *t = damon_new_target(); + struct damon_region *r1, *r2; + struct damon_addr_range range = {.start = 8, .end = 28}; + unsigned long expects[] = {8, 16, 16, 24, 24, 28}; + int expect_idx = 0; + struct damon_region *r; + + if (!t) + kunit_skip(test, "target alloc fail"); + r1 = damon_new_region(4, 16); + if (!r1) { + damon_free_target(t); + kunit_skip(test, "region alloc fail"); + } + r2 = damon_new_region(24, 32); + if (!r2) { + damon_free_target(t); + damon_free_region(r1); + kunit_skip(test, "second region alloc fail"); + } + + damon_add_region(r1, t); + damon_add_region(r2, t); + damon_set_regions(t, &range, 1, 1); + + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 3); + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, r->ar.start, expects[expect_idx++]); + KUNIT_EXPECT_EQ(test, r->ar.end, expects[expect_idx++]); + } + damon_destroy_target(t, NULL); +} + +static void damon_test_nr_accesses_to_accesses_bp(struct kunit *test) +{ + struct damon_attrs attrs = { + .sample_interval = 10, + .aggr_interval = ((unsigned long)UINT_MAX + 1) * 10 + }; + + /* + * In some cases such as 32bit architectures where UINT_MAX is + * ULONG_MAX, attrs.aggr_interval becomes zero. Calling + * damon_nr_accesses_to_accesses_bp() in the case will cause + * divide-by-zero. Such case is prohibited in normal execution since + * the caution is documented on the comment for the function, and + * damon_update_monitoring_results() does the check. Skip the test in + * the case. + */ + if (!attrs.aggr_interval) + kunit_skip(test, "aggr_interval is zero."); + + KUNIT_EXPECT_EQ(test, damon_nr_accesses_to_accesses_bp(123, &attrs), 0); +} + +static void damon_test_update_monitoring_result(struct kunit *test) +{ + struct damon_attrs old_attrs = { + .sample_interval = 10, .aggr_interval = 1000,}; + struct damon_attrs new_attrs; + struct damon_region *r = damon_new_region(3, 7); + + if (!r) + kunit_skip(test, "region alloc fail"); + + r->nr_accesses = 15; + r->nr_accesses_bp = 150000; + r->age = 20; + + new_attrs = (struct damon_attrs){ + .sample_interval = 100, .aggr_interval = 10000,}; + damon_update_monitoring_result(r, &old_attrs, &new_attrs, false); + KUNIT_EXPECT_EQ(test, r->nr_accesses, 15); + KUNIT_EXPECT_EQ(test, r->age, 2); + + new_attrs = (struct damon_attrs){ + .sample_interval = 1, .aggr_interval = 1000}; + damon_update_monitoring_result(r, &old_attrs, &new_attrs, false); + KUNIT_EXPECT_EQ(test, r->nr_accesses, 150); + KUNIT_EXPECT_EQ(test, r->age, 2); + + new_attrs = (struct damon_attrs){ + .sample_interval = 1, .aggr_interval = 100}; + damon_update_monitoring_result(r, &old_attrs, &new_attrs, false); + KUNIT_EXPECT_EQ(test, r->nr_accesses, 150); + KUNIT_EXPECT_EQ(test, r->age, 20); + + damon_free_region(r); +} + +static void damon_test_set_attrs(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_attrs valid_attrs = { + .min_nr_regions = 10, .max_nr_regions = 1000, + .sample_interval = 5000, .aggr_interval = 100000,}; + struct damon_attrs invalid_attrs; + + if (!c) + kunit_skip(test, "ctx alloc fail"); + + KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &valid_attrs), 0); + + invalid_attrs = valid_attrs; + invalid_attrs.min_nr_regions = 1; + KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL); + + invalid_attrs = valid_attrs; + invalid_attrs.max_nr_regions = 9; + KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL); + + invalid_attrs = valid_attrs; + invalid_attrs.aggr_interval = 4999; + KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL); + + damon_destroy_ctx(c); +} + +static void damon_test_moving_sum(struct kunit *test) +{ + unsigned int mvsum = 50000, nomvsum = 50000, len_window = 10; + unsigned int new_values[] = {10000, 0, 10000, 0, 0, 0, 10000, 0, 0, 0}; + unsigned int expects[] = {55000, 50000, 55000, 50000, 45000, 40000, + 45000, 40000, 35000, 30000}; + int i; + + for (i = 0; i < ARRAY_SIZE(new_values); i++) { + mvsum = damon_moving_sum(mvsum, nomvsum, len_window, + new_values[i]); + KUNIT_EXPECT_EQ(test, mvsum, expects[i]); + } +} + +static void damos_test_new_filter(struct kunit *test) +{ + struct damos_filter *filter; + + filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true, false); + if (!filter) + kunit_skip(test, "filter alloc fail"); + KUNIT_EXPECT_EQ(test, filter->type, DAMOS_FILTER_TYPE_ANON); + KUNIT_EXPECT_EQ(test, filter->matching, true); + KUNIT_EXPECT_PTR_EQ(test, filter->list.prev, &filter->list); + KUNIT_EXPECT_PTR_EQ(test, filter->list.next, &filter->list); + damos_destroy_filter(filter); +} + +static void damos_test_commit_quota_goal_for(struct kunit *test, + struct damos_quota_goal *dst, + struct damos_quota_goal *src) +{ + u64 dst_last_psi_total = 0; + + if (dst->metric == DAMOS_QUOTA_SOME_MEM_PSI_US) + dst_last_psi_total = dst->last_psi_total; + damos_commit_quota_goal(dst, src); + + KUNIT_EXPECT_EQ(test, dst->metric, src->metric); + KUNIT_EXPECT_EQ(test, dst->target_value, src->target_value); + if (src->metric == DAMOS_QUOTA_USER_INPUT) + KUNIT_EXPECT_EQ(test, dst->current_value, src->current_value); + if (dst_last_psi_total && src->metric == DAMOS_QUOTA_SOME_MEM_PSI_US) + KUNIT_EXPECT_EQ(test, dst->last_psi_total, dst_last_psi_total); + switch (dst->metric) { + case DAMOS_QUOTA_NODE_MEM_USED_BP: + case DAMOS_QUOTA_NODE_MEM_FREE_BP: + KUNIT_EXPECT_EQ(test, dst->nid, src->nid); + break; + case DAMOS_QUOTA_NODE_MEMCG_USED_BP: + case DAMOS_QUOTA_NODE_MEMCG_FREE_BP: + KUNIT_EXPECT_EQ(test, dst->nid, src->nid); + KUNIT_EXPECT_EQ(test, dst->memcg_id, src->memcg_id); + break; + default: + break; + } +} + +static void damos_test_commit_quota_goal(struct kunit *test) +{ + struct damos_quota_goal dst = { + .metric = DAMOS_QUOTA_SOME_MEM_PSI_US, + .target_value = 1000, + .current_value = 123, + .last_psi_total = 456, + }; + + damos_test_commit_quota_goal_for(test, &dst, + &(struct damos_quota_goal){ + .metric = DAMOS_QUOTA_USER_INPUT, + .target_value = 789, + .current_value = 12}); + damos_test_commit_quota_goal_for(test, &dst, + &(struct damos_quota_goal){ + .metric = DAMOS_QUOTA_NODE_MEM_FREE_BP, + .target_value = 345, + .current_value = 678, + .nid = 9, + }); + damos_test_commit_quota_goal_for(test, &dst, + &(struct damos_quota_goal){ + .metric = DAMOS_QUOTA_NODE_MEM_USED_BP, + .target_value = 12, + .current_value = 345, + .nid = 6, + }); + damos_test_commit_quota_goal_for(test, &dst, + &(struct damos_quota_goal){ + .metric = DAMOS_QUOTA_NODE_MEMCG_USED_BP, + .target_value = 456, + .current_value = 567, + .nid = 6, + .memcg_id = 7, + }); + damos_test_commit_quota_goal_for(test, &dst, + &(struct damos_quota_goal){ + .metric = DAMOS_QUOTA_NODE_MEMCG_FREE_BP, + .target_value = 890, + .current_value = 901, + .nid = 10, + .memcg_id = 1, + }); + damos_test_commit_quota_goal_for(test, &dst, + &(struct damos_quota_goal) { + .metric = DAMOS_QUOTA_USER_INPUT, + .target_value = 789, + .current_value = 12, + }); +} + +static void damos_test_commit_quota_goals_for(struct kunit *test, + struct damos_quota_goal *dst_goals, int nr_dst_goals, + struct damos_quota_goal *src_goals, int nr_src_goals) +{ + struct damos_quota dst, src; + struct damos_quota_goal *goal, *next; + bool skip = true; + int i; + + INIT_LIST_HEAD(&dst.goals); + INIT_LIST_HEAD(&src.goals); + + for (i = 0; i < nr_dst_goals; i++) { + /* + * When nr_src_goals is smaller than dst_goals, + * damos_commit_quota_goals() will kfree() the dst goals. + * Make it kfree()-able. + */ + goal = damos_new_quota_goal(dst_goals[i].metric, + dst_goals[i].target_value); + if (!goal) + goto out; + damos_add_quota_goal(&dst, goal); + } + skip = false; + for (i = 0; i < nr_src_goals; i++) + damos_add_quota_goal(&src, &src_goals[i]); + + damos_commit_quota_goals(&dst, &src); + + i = 0; + damos_for_each_quota_goal(goal, (&dst)) { + KUNIT_EXPECT_EQ(test, goal->metric, src_goals[i].metric); + KUNIT_EXPECT_EQ(test, goal->target_value, + src_goals[i++].target_value); + } + KUNIT_EXPECT_EQ(test, i, nr_src_goals); + +out: + damos_for_each_quota_goal_safe(goal, next, (&dst)) + damos_destroy_quota_goal(goal); + if (skip) + kunit_skip(test, "goal alloc fail"); +} + +static void damos_test_commit_quota_goals(struct kunit *test) +{ + damos_test_commit_quota_goals_for(test, + (struct damos_quota_goal[]){}, 0, + (struct damos_quota_goal[]){ + { + .metric = DAMOS_QUOTA_USER_INPUT, + .target_value = 123, + }, + }, 1); + damos_test_commit_quota_goals_for(test, + (struct damos_quota_goal[]){ + { + .metric = DAMOS_QUOTA_USER_INPUT, + .target_value = 234, + }, + + }, 1, + (struct damos_quota_goal[]){ + { + .metric = DAMOS_QUOTA_USER_INPUT, + .target_value = 345, + }, + }, 1); + damos_test_commit_quota_goals_for(test, + (struct damos_quota_goal[]){ + { + .metric = DAMOS_QUOTA_USER_INPUT, + .target_value = 456, + }, + + }, 1, + (struct damos_quota_goal[]){}, 0); +} + +static void damos_test_commit_quota(struct kunit *test) +{ + struct damos_quota dst = { + .reset_interval = 1, + .ms = 2, + .sz = 3, + .weight_sz = 4, + .weight_nr_accesses = 5, + .weight_age = 6, + }; + struct damos_quota src = { + .reset_interval = 7, + .ms = 8, + .sz = 9, + .weight_sz = 10, + .weight_nr_accesses = 11, + .weight_age = 12, + }; + + INIT_LIST_HEAD(&dst.goals); + INIT_LIST_HEAD(&src.goals); + + damos_commit_quota(&dst, &src); + + KUNIT_EXPECT_EQ(test, dst.reset_interval, src.reset_interval); + KUNIT_EXPECT_EQ(test, dst.ms, src.ms); + KUNIT_EXPECT_EQ(test, dst.sz, src.sz); + KUNIT_EXPECT_EQ(test, dst.weight_sz, src.weight_sz); + KUNIT_EXPECT_EQ(test, dst.weight_nr_accesses, src.weight_nr_accesses); + KUNIT_EXPECT_EQ(test, dst.weight_age, src.weight_age); +} + +static int damos_test_help_dests_setup(struct damos_migrate_dests *dests, + unsigned int *node_id_arr, unsigned int *weight_arr, + size_t nr_dests) +{ + size_t i; + + dests->node_id_arr = kmalloc_array(nr_dests, + sizeof(*dests->node_id_arr), GFP_KERNEL); + if (!dests->node_id_arr) + return -ENOMEM; + dests->weight_arr = kmalloc_array(nr_dests, + sizeof(*dests->weight_arr), GFP_KERNEL); + if (!dests->weight_arr) { + kfree(dests->node_id_arr); + dests->node_id_arr = NULL; + return -ENOMEM; + } + + for (i = 0; i < nr_dests; i++) { + dests->node_id_arr[i] = node_id_arr[i]; + dests->weight_arr[i] = weight_arr[i]; + } + dests->nr_dests = nr_dests; + return 0; +} + +static void damos_test_help_dests_free(struct damos_migrate_dests *dests) +{ + kfree(dests->node_id_arr); + kfree(dests->weight_arr); +} + +static void damos_test_commit_dests_for(struct kunit *test, + unsigned int *dst_node_id_arr, unsigned int *dst_weight_arr, + size_t dst_nr_dests, + unsigned int *src_node_id_arr, unsigned int *src_weight_arr, + size_t src_nr_dests) +{ + struct damos_migrate_dests dst = {}, src = {}; + int i, err; + bool skip = true; + + err = damos_test_help_dests_setup(&dst, dst_node_id_arr, + dst_weight_arr, dst_nr_dests); + if (err) + kunit_skip(test, "dests setup fail"); + err = damos_test_help_dests_setup(&src, src_node_id_arr, + src_weight_arr, src_nr_dests); + if (err) { + damos_test_help_dests_free(&dst); + kunit_skip(test, "src setup fail"); + } + err = damos_commit_dests(&dst, &src); + if (err) + goto out; + skip = false; + + KUNIT_EXPECT_EQ(test, dst.nr_dests, src_nr_dests); + for (i = 0; i < dst.nr_dests; i++) { + KUNIT_EXPECT_EQ(test, dst.node_id_arr[i], src_node_id_arr[i]); + KUNIT_EXPECT_EQ(test, dst.weight_arr[i], src_weight_arr[i]); + } + +out: + damos_test_help_dests_free(&dst); + damos_test_help_dests_free(&src); + if (skip) + kunit_skip(test, "skip"); +} + +static void damos_test_commit_dests(struct kunit *test) +{ + damos_test_commit_dests_for(test, + (unsigned int[]){1, 2, 3}, (unsigned int[]){2, 3, 4}, + 3, + (unsigned int[]){4, 5, 6}, (unsigned int[]){5, 6, 7}, + 3); + damos_test_commit_dests_for(test, + (unsigned int[]){1, 2}, (unsigned int[]){2, 3}, + 2, + (unsigned int[]){4, 5, 6}, (unsigned int[]){5, 6, 7}, + 3); + damos_test_commit_dests_for(test, + NULL, NULL, 0, + (unsigned int[]){4, 5, 6}, (unsigned int[]){5, 6, 7}, + 3); + damos_test_commit_dests_for(test, + (unsigned int[]){1, 2, 3}, (unsigned int[]){2, 3, 4}, + 3, + (unsigned int[]){4, 5}, (unsigned int[]){5, 6}, 2); + damos_test_commit_dests_for(test, + (unsigned int[]){1, 2, 3}, (unsigned int[]){2, 3, 4}, + 3, + NULL, NULL, 0); +} + +static void damos_test_commit_filter_for(struct kunit *test, + struct damos_filter *dst, struct damos_filter *src) +{ + damos_commit_filter(dst, src); + KUNIT_EXPECT_EQ(test, dst->type, src->type); + KUNIT_EXPECT_EQ(test, dst->matching, src->matching); + KUNIT_EXPECT_EQ(test, dst->allow, src->allow); + switch (src->type) { + case DAMOS_FILTER_TYPE_MEMCG: + KUNIT_EXPECT_EQ(test, dst->memcg_id, src->memcg_id); + break; + case DAMOS_FILTER_TYPE_ADDR: + KUNIT_EXPECT_EQ(test, dst->addr_range.start, + src->addr_range.start); + KUNIT_EXPECT_EQ(test, dst->addr_range.end, + src->addr_range.end); + break; + case DAMOS_FILTER_TYPE_TARGET: + KUNIT_EXPECT_EQ(test, dst->target_idx, src->target_idx); + break; + case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: + KUNIT_EXPECT_EQ(test, dst->sz_range.min, src->sz_range.min); + KUNIT_EXPECT_EQ(test, dst->sz_range.max, src->sz_range.max); + break; + default: + break; + } +} + +static void damos_test_commit_filter(struct kunit *test) +{ + struct damos_filter dst = { + .type = DAMOS_FILTER_TYPE_ACTIVE, + .matching = false, + .allow = false, + }; + + damos_test_commit_filter_for(test, &dst, + &(struct damos_filter){ + .type = DAMOS_FILTER_TYPE_ANON, + .matching = true, + .allow = true, + }); + damos_test_commit_filter_for(test, &dst, + &(struct damos_filter){ + .type = DAMOS_FILTER_TYPE_MEMCG, + .matching = false, + .allow = false, + .memcg_id = 123, + }); + damos_test_commit_filter_for(test, &dst, + &(struct damos_filter){ + .type = DAMOS_FILTER_TYPE_YOUNG, + .matching = true, + .allow = true, + }); + damos_test_commit_filter_for(test, &dst, + &(struct damos_filter){ + .type = DAMOS_FILTER_TYPE_HUGEPAGE_SIZE, + .matching = false, + .allow = false, + .sz_range = {.min = 234, .max = 345}, + }); + damos_test_commit_filter_for(test, &dst, + &(struct damos_filter){ + .type = DAMOS_FILTER_TYPE_UNMAPPED, + .matching = true, + .allow = true, + }); + damos_test_commit_filter_for(test, &dst, + &(struct damos_filter){ + .type = DAMOS_FILTER_TYPE_ADDR, + .matching = false, + .allow = false, + .addr_range = {.start = 456, .end = 567}, + }); + damos_test_commit_filter_for(test, &dst, + &(struct damos_filter){ + .type = DAMOS_FILTER_TYPE_TARGET, + .matching = true, + .allow = true, + .target_idx = 6, + }); +} + +static void damos_test_help_initailize_scheme(struct damos *scheme) +{ + INIT_LIST_HEAD(&scheme->quota.goals); + INIT_LIST_HEAD(&scheme->core_filters); + INIT_LIST_HEAD(&scheme->ops_filters); +} + +static void damos_test_commit_for(struct kunit *test, struct damos *dst, + struct damos *src) +{ + int err; + + damos_test_help_initailize_scheme(dst); + damos_test_help_initailize_scheme(src); + + err = damos_commit(dst, src); + if (err) + kunit_skip(test, "damos_commit fail"); + + KUNIT_EXPECT_EQ(test, dst->pattern.min_sz_region, + src->pattern.min_sz_region); + KUNIT_EXPECT_EQ(test, dst->pattern.max_sz_region, + src->pattern.max_sz_region); + KUNIT_EXPECT_EQ(test, dst->pattern.min_nr_accesses, + src->pattern.min_nr_accesses); + KUNIT_EXPECT_EQ(test, dst->pattern.max_nr_accesses, + src->pattern.max_nr_accesses); + KUNIT_EXPECT_EQ(test, dst->pattern.min_age_region, + src->pattern.min_age_region); + KUNIT_EXPECT_EQ(test, dst->pattern.max_age_region, + src->pattern.max_age_region); + + KUNIT_EXPECT_EQ(test, dst->action, src->action); + KUNIT_EXPECT_EQ(test, dst->apply_interval_us, src->apply_interval_us); + + KUNIT_EXPECT_EQ(test, dst->wmarks.metric, src->wmarks.metric); + KUNIT_EXPECT_EQ(test, dst->wmarks.interval, src->wmarks.interval); + KUNIT_EXPECT_EQ(test, dst->wmarks.high, src->wmarks.high); + KUNIT_EXPECT_EQ(test, dst->wmarks.mid, src->wmarks.mid); + KUNIT_EXPECT_EQ(test, dst->wmarks.low, src->wmarks.low); + + switch (src->action) { + case DAMOS_MIGRATE_COLD: + case DAMOS_MIGRATE_HOT: + KUNIT_EXPECT_EQ(test, dst->target_nid, src->target_nid); + break; + default: + break; + } +} + +static void damos_test_commit(struct kunit *test) +{ + damos_test_commit_for(test, + &(struct damos){ + .pattern = (struct damos_access_pattern){ + 1, 2, 3, 4, 5, 6}, + .action = DAMOS_PAGEOUT, + .apply_interval_us = 1000000, + .wmarks = (struct damos_watermarks){ + DAMOS_WMARK_FREE_MEM_RATE, + 900, 100, 50}, + }, + &(struct damos){ + .pattern = (struct damos_access_pattern){ + 2, 3, 4, 5, 6, 7}, + .action = DAMOS_PAGEOUT, + .apply_interval_us = 2000000, + .wmarks = (struct damos_watermarks){ + DAMOS_WMARK_FREE_MEM_RATE, + 800, 50, 30}, + }); + damos_test_commit_for(test, + &(struct damos){ + .pattern = (struct damos_access_pattern){ + 1, 2, 3, 4, 5, 6}, + .action = DAMOS_PAGEOUT, + .apply_interval_us = 1000000, + .wmarks = (struct damos_watermarks){ + DAMOS_WMARK_FREE_MEM_RATE, + 900, 100, 50}, + }, + &(struct damos){ + .pattern = (struct damos_access_pattern){ + 2, 3, 4, 5, 6, 7}, + .action = DAMOS_MIGRATE_HOT, + .apply_interval_us = 2000000, + .target_nid = 5, + }); +} + +static struct damon_target *damon_test_help_setup_target( + unsigned long region_start_end[][2], int nr_regions) +{ + struct damon_target *t; + struct damon_region *r; + int i; + + t = damon_new_target(); + if (!t) + return NULL; + for (i = 0; i < nr_regions; i++) { + r = damon_new_region(region_start_end[i][0], + region_start_end[i][1]); + if (!r) { + damon_free_target(t); + return NULL; + } + damon_add_region(r, t); + } + return t; +} + +static void damon_test_commit_target_regions_for(struct kunit *test, + unsigned long dst_start_end[][2], int nr_dst_regions, + unsigned long src_start_end[][2], int nr_src_regions, + unsigned long expect_start_end[][2], int nr_expect_regions) +{ + struct damon_target *dst_target, *src_target; + struct damon_region *r; + int i; + + dst_target = damon_test_help_setup_target(dst_start_end, nr_dst_regions); + if (!dst_target) + kunit_skip(test, "dst target setup fail"); + src_target = damon_test_help_setup_target(src_start_end, nr_src_regions); + if (!src_target) { + damon_free_target(dst_target); + kunit_skip(test, "src target setup fail"); + } + damon_commit_target_regions(dst_target, src_target, 1); + i = 0; + damon_for_each_region(r, dst_target) { + KUNIT_EXPECT_EQ(test, r->ar.start, expect_start_end[i][0]); + KUNIT_EXPECT_EQ(test, r->ar.end, expect_start_end[i][1]); + i++; + } + KUNIT_EXPECT_EQ(test, damon_nr_regions(dst_target), nr_expect_regions); + KUNIT_EXPECT_EQ(test, i, nr_expect_regions); + damon_free_target(dst_target); + damon_free_target(src_target); +} + +static void damon_test_commit_target_regions(struct kunit *test) +{ + damon_test_commit_target_regions_for(test, + (unsigned long[][2]) {{3, 8}, {8, 10}}, 2, + (unsigned long[][2]) {{4, 6}}, 1, + (unsigned long[][2]) {{4, 6}}, 1); + damon_test_commit_target_regions_for(test, + (unsigned long[][2]) {{3, 8}, {8, 10}}, 2, + (unsigned long[][2]) {}, 0, + (unsigned long[][2]) {{3, 8}, {8, 10}}, 2); +} + +static void damos_test_filter_out(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r, *r2; + struct damos_filter *f; + + f = damos_new_filter(DAMOS_FILTER_TYPE_ADDR, true, false); + if (!f) + kunit_skip(test, "filter alloc fail"); + f->addr_range = (struct damon_addr_range){.start = 2, .end = 6}; + + t = damon_new_target(); + if (!t) { + damos_destroy_filter(f); + kunit_skip(test, "target alloc fail"); + } + r = damon_new_region(3, 5); + if (!r) { + damos_destroy_filter(f); + damon_free_target(t); + kunit_skip(test, "region alloc fail"); + } + damon_add_region(r, t); + + /* region in the range */ + KUNIT_EXPECT_TRUE(test, damos_filter_match(NULL, t, r, f, 1)); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1); + + /* region before the range */ + r->ar.start = 1; + r->ar.end = 2; + KUNIT_EXPECT_FALSE(test, + damos_filter_match(NULL, t, r, f, 1)); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1); + + /* region after the range */ + r->ar.start = 6; + r->ar.end = 8; + KUNIT_EXPECT_FALSE(test, + damos_filter_match(NULL, t, r, f, 1)); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1); + + /* region started before the range */ + r->ar.start = 1; + r->ar.end = 4; + KUNIT_EXPECT_FALSE(test, damos_filter_match(NULL, t, r, f, 1)); + /* filter should have split the region */ + KUNIT_EXPECT_EQ(test, r->ar.start, 1); + KUNIT_EXPECT_EQ(test, r->ar.end, 2); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2); + r2 = damon_next_region(r); + KUNIT_EXPECT_EQ(test, r2->ar.start, 2); + KUNIT_EXPECT_EQ(test, r2->ar.end, 4); + damon_destroy_region(r2, t); + + /* region started in the range */ + r->ar.start = 2; + r->ar.end = 8; + KUNIT_EXPECT_TRUE(test, + damos_filter_match(NULL, t, r, f, 1)); + /* filter should have split the region */ + KUNIT_EXPECT_EQ(test, r->ar.start, 2); + KUNIT_EXPECT_EQ(test, r->ar.end, 6); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2); + r2 = damon_next_region(r); + KUNIT_EXPECT_EQ(test, r2->ar.start, 6); + KUNIT_EXPECT_EQ(test, r2->ar.end, 8); + damon_destroy_region(r2, t); + + damon_free_target(t); + damos_free_filter(f); +} + +static void damon_test_feed_loop_next_input(struct kunit *test) +{ + unsigned long last_input = 900000, current_score = 200; + + /* + * If current score is lower than the goal, which is always 10,000 + * (read the comment on damon_feed_loop_next_input()'s comment), next + * input should be higher than the last input. + */ + KUNIT_EXPECT_GT(test, + damon_feed_loop_next_input(last_input, current_score), + last_input); + + /* + * If current score is higher than the goal, next input should be lower + * than the last input. + */ + current_score = 250000000; + KUNIT_EXPECT_LT(test, + damon_feed_loop_next_input(last_input, current_score), + last_input); + + /* + * The next input depends on the distance between the current score and + * the goal + */ + KUNIT_EXPECT_GT(test, + damon_feed_loop_next_input(last_input, 200), + damon_feed_loop_next_input(last_input, 2000)); +} + +static void damon_test_set_filters_default_reject(struct kunit *test) +{ + struct damos scheme; + struct damos_filter *target_filter, *anon_filter; + + INIT_LIST_HEAD(&scheme.core_filters); + INIT_LIST_HEAD(&scheme.ops_filters); + + damos_set_filters_default_reject(&scheme); + /* + * No filter is installed. Allow by default on both core and ops layer + * filtering stages, since there are no filters at all. + */ + KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, false); + KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, false); + + target_filter = damos_new_filter(DAMOS_FILTER_TYPE_TARGET, true, true); + if (!target_filter) + kunit_skip(test, "filter alloc fail"); + damos_add_filter(&scheme, target_filter); + damos_set_filters_default_reject(&scheme); + /* + * A core-handled allow-filter is installed. + * Rejct by default on core layer filtering stage due to the last + * core-layer-filter's behavior. + * Allow by default on ops layer filtering stage due to the absence of + * ops layer filters. + */ + KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, true); + KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, false); + + target_filter->allow = false; + damos_set_filters_default_reject(&scheme); + /* + * A core-handled reject-filter is installed. + * Allow by default on core layer filtering stage due to the last + * core-layer-filter's behavior. + * Allow by default on ops layer filtering stage due to the absence of + * ops layer filters. + */ + KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, false); + KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, false); + + anon_filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true, true); + if (!anon_filter) { + damos_free_filter(target_filter); + kunit_skip(test, "anon_filter alloc fail"); + } + damos_add_filter(&scheme, anon_filter); + + damos_set_filters_default_reject(&scheme); + /* + * A core-handled reject-filter and ops-handled allow-filter are installed. + * Allow by default on core layer filtering stage due to the existence + * of the ops-handled filter. + * Reject by default on ops layer filtering stage due to the last + * ops-layer-filter's behavior. + */ + KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, false); + KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, true); + + target_filter->allow = true; + damos_set_filters_default_reject(&scheme); + /* + * A core-handled allow-filter and ops-handled allow-filter are + * installed. + * Allow by default on core layer filtering stage due to the existence + * of the ops-handled filter. + * Reject by default on ops layer filtering stage due to the last + * ops-layer-filter's behavior. + */ + KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, false); + KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, true); + + damos_free_filter(anon_filter); + damos_free_filter(target_filter); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_test_target), + KUNIT_CASE(damon_test_regions), + KUNIT_CASE(damon_test_aggregate), + KUNIT_CASE(damon_test_split_at), + KUNIT_CASE(damon_test_merge_two), + KUNIT_CASE(damon_test_merge_regions_of), + KUNIT_CASE(damon_test_split_regions_of), + KUNIT_CASE(damon_test_ops_registration), + KUNIT_CASE(damon_test_set_regions), + KUNIT_CASE(damon_test_nr_accesses_to_accesses_bp), + KUNIT_CASE(damon_test_update_monitoring_result), + KUNIT_CASE(damon_test_set_attrs), + KUNIT_CASE(damon_test_moving_sum), + KUNIT_CASE(damos_test_new_filter), + KUNIT_CASE(damos_test_commit_quota_goal), + KUNIT_CASE(damos_test_commit_quota_goals), + KUNIT_CASE(damos_test_commit_quota), + KUNIT_CASE(damos_test_commit_dests), + KUNIT_CASE(damos_test_commit_filter), + KUNIT_CASE(damos_test_commit), + KUNIT_CASE(damon_test_commit_target_regions), + KUNIT_CASE(damos_test_filter_out), + KUNIT_CASE(damon_test_feed_loop_next_input), + KUNIT_CASE(damon_test_set_filters_default_reject), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_CORE_TEST_H */ + +#endif /* CONFIG_DAMON_KUNIT_TEST */ diff --git a/mm/damon/tests/sysfs-kunit.h b/mm/damon/tests/sysfs-kunit.h new file mode 100644 index 000000000000..0c665ed255a3 --- /dev/null +++ b/mm/damon/tests/sysfs-kunit.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Access Monitor Unit Tests + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#ifdef CONFIG_DAMON_SYSFS_KUNIT_TEST + +#ifndef _DAMON_SYSFS_TEST_H +#define _DAMON_SYSFS_TEST_H + +#include <kunit/test.h> + +static unsigned int nr_damon_targets(struct damon_ctx *ctx) +{ + struct damon_target *t; + unsigned int nr_targets = 0; + + damon_for_each_target(t, ctx) + nr_targets++; + + return nr_targets; +} + +static int __damon_sysfs_test_get_any_pid(int min, int max) +{ + struct pid *pid; + int i; + + for (i = min; i <= max; i++) { + pid = find_get_pid(i); + if (pid) { + put_pid(pid); + return i; + } + } + return -1; +} + +static void damon_sysfs_test_add_targets(struct kunit *test) +{ + struct damon_sysfs_targets *sysfs_targets; + struct damon_sysfs_target *sysfs_target; + struct damon_ctx *ctx; + + sysfs_targets = damon_sysfs_targets_alloc(); + if (!sysfs_targets) + kunit_skip(test, "sysfs_targets alloc fail"); + sysfs_targets->nr = 1; + sysfs_targets->targets_arr = kmalloc_array(1, + sizeof(*sysfs_targets->targets_arr), GFP_KERNEL); + if (!sysfs_targets->targets_arr) { + kfree(sysfs_targets); + kunit_skip(test, "targets_arr alloc fail"); + } + + sysfs_target = damon_sysfs_target_alloc(); + if (!sysfs_target) { + kfree(sysfs_targets->targets_arr); + kfree(sysfs_targets); + kunit_skip(test, "sysfs_target alloc fail"); + } + sysfs_target->pid = __damon_sysfs_test_get_any_pid(12, 100); + sysfs_target->regions = damon_sysfs_regions_alloc(); + if (!sysfs_target->regions) { + kfree(sysfs_targets->targets_arr); + kfree(sysfs_targets); + kfree(sysfs_target); + kunit_skip(test, "sysfs_regions alloc fail"); + } + + sysfs_targets->targets_arr[0] = sysfs_target; + + ctx = damon_new_ctx(); + if (!ctx) { + kfree(sysfs_targets->targets_arr); + kfree(sysfs_targets); + kfree(sysfs_target->regions); + kfree(sysfs_target); + kunit_skip(test, "ctx alloc fail"); + } + + damon_sysfs_add_targets(ctx, sysfs_targets); + KUNIT_EXPECT_EQ(test, 1u, nr_damon_targets(ctx)); + + sysfs_target->pid = __damon_sysfs_test_get_any_pid( + sysfs_target->pid + 1, 200); + damon_sysfs_add_targets(ctx, sysfs_targets); + KUNIT_EXPECT_EQ(test, 2u, nr_damon_targets(ctx)); + + damon_destroy_ctx(ctx); + kfree(sysfs_targets->targets_arr); + kfree(sysfs_targets); + kfree(sysfs_target->regions); + kfree(sysfs_target); +} + +static struct kunit_case damon_sysfs_test_cases[] = { + KUNIT_CASE(damon_sysfs_test_add_targets), + {}, +}; + +static struct kunit_suite damon_sysfs_test_suite = { + .name = "damon-sysfs", + .test_cases = damon_sysfs_test_cases, +}; +kunit_test_suite(damon_sysfs_test_suite); + +#endif /* _DAMON_SYSFS_TEST_H */ + +#endif /* CONFIG_DAMON_SYSFS_KUNIT_TEST */ diff --git a/mm/damon/tests/vaddr-kunit.h b/mm/damon/tests/vaddr-kunit.h new file mode 100644 index 000000000000..30dc5459f1d2 --- /dev/null +++ b/mm/damon/tests/vaddr-kunit.h @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Access Monitor Unit Tests + * + * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST + +#ifndef _DAMON_VADDR_TEST_H +#define _DAMON_VADDR_TEST_H + +#include <kunit/test.h> + +static int __link_vmas(struct maple_tree *mt, struct vm_area_struct *vmas, + ssize_t nr_vmas) +{ + int i, ret = -ENOMEM; + MA_STATE(mas, mt, 0, 0); + + if (!nr_vmas) + return 0; + + mas_lock(&mas); + for (i = 0; i < nr_vmas; i++) { + mas_set_range(&mas, vmas[i].vm_start, vmas[i].vm_end - 1); + if (mas_store_gfp(&mas, &vmas[i], GFP_KERNEL)) + goto failed; + } + + ret = 0; +failed: + mas_unlock(&mas); + return ret; +} + +/* + * Test __damon_va_three_regions() function + * + * In case of virtual memory address spaces monitoring, DAMON converts the + * complex and dynamic memory mappings of each target task to three + * discontiguous regions which cover every mapped areas. However, the three + * regions should not include the two biggest unmapped areas in the original + * mapping, because the two biggest areas are normally the areas between 1) + * heap and the mmap()-ed regions, and 2) the mmap()-ed regions and stack. + * Because these two unmapped areas are very huge but obviously never accessed, + * covering the region is just a waste. + * + * '__damon_va_three_regions() receives an address space of a process. It + * first identifies the start of mappings, end of mappings, and the two biggest + * unmapped areas. After that, based on the information, it constructs the + * three regions and returns. For more detail, refer to the comment of + * 'damon_init_regions_of()' function definition in 'mm/damon.c' file. + * + * For example, suppose virtual address ranges of 10-20, 20-25, 200-210, + * 210-220, 300-305, and 307-330 (Other comments represent this mappings in + * more short form: 10-20-25, 200-210-220, 300-305, 307-330) of a process are + * mapped. To cover every mappings, the three regions should start with 10, + * and end with 305. The process also has three unmapped areas, 25-200, + * 220-300, and 305-307. Among those, 25-200 and 220-300 are the biggest two + * unmapped areas, and thus it should be converted to three regions of 10-25, + * 200-220, and 300-330. + */ +static void damon_test_three_regions_in_vmas(struct kunit *test) +{ + static struct mm_struct mm; + struct damon_addr_range regions[3] = {0}; + /* 10-20-25, 200-210-220, 300-305, 307-330 */ + static struct vm_area_struct vmas[] = { + (struct vm_area_struct) {.vm_start = 10, .vm_end = 20}, + (struct vm_area_struct) {.vm_start = 20, .vm_end = 25}, + (struct vm_area_struct) {.vm_start = 200, .vm_end = 210}, + (struct vm_area_struct) {.vm_start = 210, .vm_end = 220}, + (struct vm_area_struct) {.vm_start = 300, .vm_end = 305}, + (struct vm_area_struct) {.vm_start = 307, .vm_end = 330}, + }; + + mt_init_flags(&mm.mm_mt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_USE_RCU); + if (__link_vmas(&mm.mm_mt, vmas, ARRAY_SIZE(vmas))) + kunit_skip(test, "Failed to create VMA tree"); + + __damon_va_three_regions(&mm, regions); + + KUNIT_EXPECT_EQ(test, 10ul, regions[0].start); + KUNIT_EXPECT_EQ(test, 25ul, regions[0].end); + KUNIT_EXPECT_EQ(test, 200ul, regions[1].start); + KUNIT_EXPECT_EQ(test, 220ul, regions[1].end); + KUNIT_EXPECT_EQ(test, 300ul, regions[2].start); + KUNIT_EXPECT_EQ(test, 330ul, regions[2].end); +} + +static struct damon_region *__nth_region_of(struct damon_target *t, int idx) +{ + struct damon_region *r; + unsigned int i = 0; + + damon_for_each_region(r, t) { + if (i++ == idx) + return r; + } + + return NULL; +} + +/* + * Test 'damon_set_regions()' + * + * test kunit object + * regions an array containing start/end addresses of current + * monitoring target regions + * nr_regions the number of the addresses in 'regions' + * three_regions The three regions that need to be applied now + * expected start/end addresses of monitoring target regions that + * 'three_regions' are applied + * nr_expected the number of addresses in 'expected' + * + * The memory mapping of the target processes changes dynamically. To follow + * the change, DAMON periodically reads the mappings, simplifies it to the + * three regions, and updates the monitoring target regions to fit in the three + * regions. The update of current target regions is the role of + * 'damon_set_regions()'. + * + * This test passes the given target regions and the new three regions that + * need to be applied to the function and check whether it updates the regions + * as expected. + */ +static void damon_do_test_apply_three_regions(struct kunit *test, + unsigned long *regions, int nr_regions, + struct damon_addr_range *three_regions, + unsigned long *expected, int nr_expected) +{ + struct damon_target *t; + struct damon_region *r; + int i; + + t = damon_new_target(); + if (!t) + kunit_skip(test, "target alloc fail"); + for (i = 0; i < nr_regions / 2; i++) { + r = damon_new_region(regions[i * 2], regions[i * 2 + 1]); + if (!r) { + damon_destroy_target(t, NULL); + kunit_skip(test, "region alloc fail"); + } + damon_add_region(r, t); + } + + damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION); + + for (i = 0; i < nr_expected / 2; i++) { + r = __nth_region_of(t, i); + KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]); + KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]); + } + + damon_destroy_target(t, NULL); +} + +/* + * This function test most common case where the three big regions are only + * slightly changed. Target regions should adjust their boundary (10-20-30, + * 50-55, 70-80, 90-100) to fit with the new big regions or remove target + * regions (57-79) that now out of the three regions. + */ +static void damon_test_apply_three_regions1(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 45-55, 73-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 45, .end = 55}, + (struct damon_addr_range){.start = 73, .end = 104} }; + /* 5-20-27, 45-55, 73-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 45, 55, + 73, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test slightly bigger change. Similar to above, but the second big region + * now require two target regions (50-55, 57-59) to be removed. + */ +static void damon_test_apply_three_regions2(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 56-57, 65-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 56, .end = 57}, + (struct damon_addr_range){.start = 65, .end = 104} }; + /* 5-20-27, 56-57, 65-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 56, 57, + 65, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test a big change. The second big region has totally freed and mapped to + * different area (50-59 -> 61-63). The target regions which were in the old + * second big region (50-55-57-59) should be removed and new target region + * covering the second big region (61-63) should be created. + */ +static void damon_test_apply_three_regions3(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 61-63, 65-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 61, .end = 63}, + (struct damon_addr_range){.start = 65, .end = 104} }; + /* 5-20-27, 61-63, 65-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 61, 63, + 65, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test another big change. Both of the second and third big regions (50-59 + * and 70-100) has totally freed and mapped to different area (30-32 and + * 65-68). The target regions which were in the old second and third big + * regions should now be removed and new target regions covering the new second + * and third big regions should be created. + */ +static void damon_test_apply_three_regions4(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-7, 30-32, 65-68 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 7}, + (struct damon_addr_range){.start = 30, .end = 32}, + (struct damon_addr_range){.start = 65, .end = 68} }; + /* expect 5-7, 30-32, 65-68 */ + unsigned long expected[] = {5, 7, 30, 32, 65, 68}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +static void damon_test_split_evenly_fail(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(); + struct damon_region *r; + + if (!t) + kunit_skip(test, "target alloc fail"); + + r = damon_new_region(start, end); + if (!r) { + damon_free_target(t); + kunit_skip(test, "region alloc fail"); + } + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); + + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, r->ar.start, start); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + } + + damon_free_target(t); +} + +static void damon_test_split_evenly_succ(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(); + struct damon_region *r; + unsigned long expected_width = (end - start) / nr_pieces; + unsigned long i = 0; + + if (!t) + kunit_skip(test, "target alloc fail"); + r = damon_new_region(start, end); + if (!r) { + damon_free_target(t); + kunit_skip(test, "region alloc fail"); + } + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), 0); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces); + + damon_for_each_region(r, t) { + if (i == nr_pieces - 1) { + KUNIT_EXPECT_EQ(test, + r->ar.start, start + i * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + break; + } + KUNIT_EXPECT_EQ(test, + r->ar.start, start + i++ * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width); + } + damon_free_target(t); +} + +static void damon_test_split_evenly(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5), + -EINVAL); + + damon_test_split_evenly_fail(test, 0, 100, 0); + damon_test_split_evenly_succ(test, 0, 100, 10); + damon_test_split_evenly_succ(test, 5, 59, 5); + damon_test_split_evenly_succ(test, 4, 6, 1); + damon_test_split_evenly_succ(test, 0, 3, 2); + damon_test_split_evenly_fail(test, 5, 6, 2); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_test_three_regions_in_vmas), + KUNIT_CASE(damon_test_apply_three_regions1), + KUNIT_CASE(damon_test_apply_three_regions2), + KUNIT_CASE(damon_test_apply_three_regions3), + KUNIT_CASE(damon_test_apply_three_regions4), + KUNIT_CASE(damon_test_split_evenly), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon-operations", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_VADDR_TEST_H */ + +#endif /* CONFIG_DAMON_VADDR_KUNIT_TEST */ diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c new file mode 100644 index 000000000000..2750c88e7225 --- /dev/null +++ b/mm/damon/vaddr.c @@ -0,0 +1,1038 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Code for Virtual Address Spaces + * + * Author: SeongJae Park <sj@kernel.org> + */ + +#define pr_fmt(fmt) "damon-va: " fmt + +#include <linux/highmem.h> +#include <linux/hugetlb.h> +#include <linux/mman.h> +#include <linux/mmu_notifier.h> +#include <linux/page_idle.h> +#include <linux/pagewalk.h> +#include <linux/sched/mm.h> + +#include "../internal.h" +#include "ops-common.h" + +#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST +#undef DAMON_MIN_REGION +#define DAMON_MIN_REGION 1 +#endif + +/* + * 't->pid' should be the pointer to the relevant 'struct pid' having reference + * count. Caller must put the returned task, unless it is NULL. + */ +static inline struct task_struct *damon_get_task_struct(struct damon_target *t) +{ + return get_pid_task(t->pid, PIDTYPE_PID); +} + +/* + * Get the mm_struct of the given target + * + * Caller _must_ put the mm_struct after use, unless it is NULL. + * + * Returns the mm_struct of the target on success, NULL on failure + */ +static struct mm_struct *damon_get_mm(struct damon_target *t) +{ + struct task_struct *task; + struct mm_struct *mm; + + task = damon_get_task_struct(t); + if (!task) + return NULL; + + mm = get_task_mm(task); + put_task_struct(task); + return mm; +} + +/* + * Functions for the initial monitoring target regions construction + */ + +/* + * Size-evenly split a region into 'nr_pieces' small regions + * + * Returns 0 on success, or negative error code otherwise. + */ +static int damon_va_evenly_split_region(struct damon_target *t, + struct damon_region *r, unsigned int nr_pieces) +{ + unsigned long sz_orig, sz_piece, orig_end; + struct damon_region *n = NULL, *next; + unsigned long start; + unsigned int i; + + if (!r || !nr_pieces) + return -EINVAL; + + if (nr_pieces == 1) + return 0; + + orig_end = r->ar.end; + sz_orig = damon_sz_region(r); + sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); + + if (!sz_piece) + return -EINVAL; + + r->ar.end = r->ar.start + sz_piece; + next = damon_next_region(r); + for (start = r->ar.end, i = 1; i < nr_pieces; start += sz_piece, i++) { + n = damon_new_region(start, start + sz_piece); + if (!n) + return -ENOMEM; + damon_insert_region(n, r, next, t); + r = n; + } + /* complement last region for possible rounding error */ + if (n) + n->ar.end = orig_end; + + return 0; +} + +static unsigned long sz_range(struct damon_addr_range *r) +{ + return r->end - r->start; +} + +/* + * Find three regions separated by two biggest unmapped regions + * + * vma the head vma of the target address space + * regions an array of three address ranges that results will be saved + * + * This function receives an address space and finds three regions in it which + * separated by the two biggest unmapped regions in the space. Please refer to + * below comments of '__damon_va_init_regions()' function to know why this is + * necessary. + * + * Returns 0 if success, or negative error code otherwise. + */ +static int __damon_va_three_regions(struct mm_struct *mm, + struct damon_addr_range regions[3]) +{ + struct damon_addr_range first_gap = {0}, second_gap = {0}; + VMA_ITERATOR(vmi, mm, 0); + struct vm_area_struct *vma, *prev = NULL; + unsigned long start; + + /* + * Find the two biggest gaps so that first_gap > second_gap > others. + * If this is too slow, it can be optimised to examine the maple + * tree gaps. + */ + rcu_read_lock(); + for_each_vma(vmi, vma) { + unsigned long gap; + + if (!prev) { + start = vma->vm_start; + goto next; + } + gap = vma->vm_start - prev->vm_end; + + if (gap > sz_range(&first_gap)) { + second_gap = first_gap; + first_gap.start = prev->vm_end; + first_gap.end = vma->vm_start; + } else if (gap > sz_range(&second_gap)) { + second_gap.start = prev->vm_end; + second_gap.end = vma->vm_start; + } +next: + prev = vma; + } + rcu_read_unlock(); + + if (!sz_range(&second_gap) || !sz_range(&first_gap)) + return -EINVAL; + + /* Sort the two biggest gaps by address */ + if (first_gap.start > second_gap.start) + swap(first_gap, second_gap); + + /* Store the result */ + regions[0].start = ALIGN(start, DAMON_MIN_REGION); + regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); + regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); + regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); + regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); + regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION); + + return 0; +} + +/* + * Get the three regions in the given target (task) + * + * Returns 0 on success, negative error code otherwise. + */ +static int damon_va_three_regions(struct damon_target *t, + struct damon_addr_range regions[3]) +{ + struct mm_struct *mm; + int rc; + + mm = damon_get_mm(t); + if (!mm) + return -EINVAL; + + mmap_read_lock(mm); + rc = __damon_va_three_regions(mm, regions); + mmap_read_unlock(mm); + + mmput(mm); + return rc; +} + +/* + * Initialize the monitoring target regions for the given target (task) + * + * t the given target + * + * Because only a number of small portions of the entire address space + * is actually mapped to the memory and accessed, monitoring the unmapped + * regions is wasteful. That said, because we can deal with small noises, + * tracking every mapping is not strictly required but could even incur a high + * overhead if the mapping frequently changes or the number of mappings is + * high. The adaptive regions adjustment mechanism will further help to deal + * with the noise by simply identifying the unmapped areas as a region that + * has no access. Moreover, applying the real mappings that would have many + * unmapped areas inside will make the adaptive mechanism quite complex. That + * said, too huge unmapped areas inside the monitoring target should be removed + * to not take the time for the adaptive mechanism. + * + * For the reason, we convert the complex mappings to three distinct regions + * that cover every mapped area of the address space. Also the two gaps + * between the three regions are the two biggest unmapped areas in the given + * address space. In detail, this function first identifies the start and the + * end of the mappings and the two biggest unmapped areas of the address space. + * Then, it constructs the three regions as below: + * + * [mappings[0]->start, big_two_unmapped_areas[0]->start) + * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) + * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) + * + * As usual memory map of processes is as below, the gap between the heap and + * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed + * region and the stack will be two biggest unmapped regions. Because these + * gaps are exceptionally huge areas in usual address space, excluding these + * two biggest unmapped regions will be sufficient to make a trade-off. + * + * <heap> + * <BIG UNMAPPED REGION 1> + * <uppermost mmap()-ed region> + * (other mmap()-ed regions and small unmapped regions) + * <lowermost mmap()-ed region> + * <BIG UNMAPPED REGION 2> + * <stack> + */ +static void __damon_va_init_regions(struct damon_ctx *ctx, + struct damon_target *t) +{ + struct damon_target *ti; + struct damon_region *r; + struct damon_addr_range regions[3]; + unsigned long sz = 0, nr_pieces; + int i, tidx = 0; + + if (damon_va_three_regions(t, regions)) { + damon_for_each_target(ti, ctx) { + if (ti == t) + break; + tidx++; + } + pr_debug("Failed to get three regions of %dth target\n", tidx); + return; + } + + for (i = 0; i < 3; i++) + sz += regions[i].end - regions[i].start; + if (ctx->attrs.min_nr_regions) + sz /= ctx->attrs.min_nr_regions; + if (sz < DAMON_MIN_REGION) + sz = DAMON_MIN_REGION; + + /* Set the initial three regions of the target */ + for (i = 0; i < 3; i++) { + r = damon_new_region(regions[i].start, regions[i].end); + if (!r) { + pr_err("%d'th init region creation failed\n", i); + return; + } + damon_add_region(r, t); + + nr_pieces = (regions[i].end - regions[i].start) / sz; + damon_va_evenly_split_region(t, r, nr_pieces); + } +} + +/* Initialize '->regions_list' of every target (task) */ +static void damon_va_init(struct damon_ctx *ctx) +{ + struct damon_target *t; + + damon_for_each_target(t, ctx) { + /* the user may set the target regions as they want */ + if (!damon_nr_regions(t)) + __damon_va_init_regions(ctx, t); + } +} + +/* + * Update regions for current memory mappings + */ +static void damon_va_update(struct damon_ctx *ctx) +{ + struct damon_addr_range three_regions[3]; + struct damon_target *t; + + damon_for_each_target(t, ctx) { + if (damon_va_three_regions(t, three_regions)) + continue; + damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION); + } +} + +static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t *pte; + spinlock_t *ptl; + + ptl = pmd_trans_huge_lock(pmd, walk->vma); + if (ptl) { + pmd_t pmde = pmdp_get(pmd); + + if (pmd_present(pmde)) + damon_pmdp_mkold(pmd, walk->vma, addr); + spin_unlock(ptl); + return 0; + } + + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte) + return 0; + if (!pte_present(ptep_get(pte))) + goto out; + damon_ptep_mkold(pte, walk->vma, addr); +out: + pte_unmap_unlock(pte, ptl); + return 0; +} + +#ifdef CONFIG_HUGETLB_PAGE +static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long addr) +{ + bool referenced = false; + pte_t entry = huge_ptep_get(mm, addr, pte); + struct folio *folio = pfn_folio(pte_pfn(entry)); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + folio_get(folio); + + if (pte_young(entry)) { + referenced = true; + entry = pte_mkold(entry); + set_huge_pte_at(mm, addr, pte, entry, psize); + } + + if (mmu_notifier_clear_young(mm, addr, + addr + huge_page_size(hstate_vma(vma)))) + referenced = true; + + if (referenced) + folio_set_young(folio); + + folio_set_idle(folio); + folio_put(folio); +} + +static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct hstate *h = hstate_vma(walk->vma); + spinlock_t *ptl; + pte_t entry; + + ptl = huge_pte_lock(h, walk->mm, pte); + entry = huge_ptep_get(walk->mm, addr, pte); + if (!pte_present(entry)) + goto out; + + damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); + +out: + spin_unlock(ptl); + return 0; +} +#else +#define damon_mkold_hugetlb_entry NULL +#endif /* CONFIG_HUGETLB_PAGE */ + +static const struct mm_walk_ops damon_mkold_ops = { + .pmd_entry = damon_mkold_pmd_entry, + .hugetlb_entry = damon_mkold_hugetlb_entry, + .walk_lock = PGWALK_RDLOCK, +}; + +static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) +{ + mmap_read_lock(mm); + walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); + mmap_read_unlock(mm); +} + +/* + * Functions for the access checking of the regions + */ + +static void __damon_va_prepare_access_check(struct mm_struct *mm, + struct damon_region *r) +{ + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + + damon_va_mkold(mm, r->sampling_addr); +} + +static void damon_va_prepare_access_checks(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct mm_struct *mm; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + mm = damon_get_mm(t); + if (!mm) + continue; + damon_for_each_region(r, t) + __damon_va_prepare_access_check(mm, r); + mmput(mm); + } +} + +struct damon_young_walk_private { + /* size of the folio for the access checked virtual memory address */ + unsigned long *folio_sz; + bool young; +}; + +static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t *pte; + pte_t ptent; + spinlock_t *ptl; + struct folio *folio; + struct damon_young_walk_private *priv = walk->private; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + ptl = pmd_trans_huge_lock(pmd, walk->vma); + if (ptl) { + pmd_t pmde = pmdp_get(pmd); + + if (!pmd_present(pmde)) + goto huge_out; + folio = vm_normal_folio_pmd(walk->vma, addr, pmde); + if (!folio) + goto huge_out; + if (pmd_young(pmde) || !folio_test_idle(folio) || + mmu_notifier_test_young(walk->mm, + addr)) + priv->young = true; + *priv->folio_sz = HPAGE_PMD_SIZE; +huge_out: + spin_unlock(ptl); + return 0; + } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte) + return 0; + ptent = ptep_get(pte); + if (!pte_present(ptent)) + goto out; + folio = vm_normal_folio(walk->vma, addr, ptent); + if (!folio) + goto out; + if (pte_young(ptent) || !folio_test_idle(folio) || + mmu_notifier_test_young(walk->mm, addr)) + priv->young = true; + *priv->folio_sz = folio_size(folio); +out: + pte_unmap_unlock(pte, ptl); + return 0; +} + +#ifdef CONFIG_HUGETLB_PAGE +static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct damon_young_walk_private *priv = walk->private; + struct hstate *h = hstate_vma(walk->vma); + struct folio *folio; + spinlock_t *ptl; + pte_t entry; + + ptl = huge_pte_lock(h, walk->mm, pte); + entry = huge_ptep_get(walk->mm, addr, pte); + if (!pte_present(entry)) + goto out; + + folio = pfn_folio(pte_pfn(entry)); + folio_get(folio); + + if (pte_young(entry) || !folio_test_idle(folio) || + mmu_notifier_test_young(walk->mm, addr)) + priv->young = true; + *priv->folio_sz = huge_page_size(h); + + folio_put(folio); + +out: + spin_unlock(ptl); + return 0; +} +#else +#define damon_young_hugetlb_entry NULL +#endif /* CONFIG_HUGETLB_PAGE */ + +static const struct mm_walk_ops damon_young_ops = { + .pmd_entry = damon_young_pmd_entry, + .hugetlb_entry = damon_young_hugetlb_entry, + .walk_lock = PGWALK_RDLOCK, +}; + +static bool damon_va_young(struct mm_struct *mm, unsigned long addr, + unsigned long *folio_sz) +{ + struct damon_young_walk_private arg = { + .folio_sz = folio_sz, + .young = false, + }; + + mmap_read_lock(mm); + walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); + mmap_read_unlock(mm); + return arg.young; +} + +/* + * Check whether the region was accessed after the last preparation + * + * mm 'mm_struct' for the given virtual address space + * r the region to be checked + */ +static void __damon_va_check_access(struct mm_struct *mm, + struct damon_region *r, bool same_target, + struct damon_attrs *attrs) +{ + static unsigned long last_addr; + static unsigned long last_folio_sz = PAGE_SIZE; + static bool last_accessed; + + if (!mm) { + damon_update_region_access_rate(r, false, attrs); + return; + } + + /* If the region is in the last checked page, reuse the result */ + if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == + ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { + damon_update_region_access_rate(r, last_accessed, attrs); + return; + } + + last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); + damon_update_region_access_rate(r, last_accessed, attrs); + + last_addr = r->sampling_addr; +} + +static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct mm_struct *mm; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + bool same_target; + + damon_for_each_target(t, ctx) { + mm = damon_get_mm(t); + same_target = false; + damon_for_each_region(r, t) { + __damon_va_check_access(mm, r, same_target, + &ctx->attrs); + max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + same_target = true; + } + if (mm) + mmput(mm); + } + + return max_nr_accesses; +} + +static bool damos_va_filter_young_match(struct damos_filter *filter, + struct folio *folio, struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, pmd_t *pmdp) +{ + bool young = false; + + if (ptep) + young = pte_young(ptep_get(ptep)); + else if (pmdp) + young = pmd_young(pmdp_get(pmdp)); + + young = young || !folio_test_idle(folio) || + mmu_notifier_test_young(vma->vm_mm, addr); + + if (young && ptep) + damon_ptep_mkold(ptep, vma, addr); + else if (young && pmdp) + damon_pmdp_mkold(pmdp, vma, addr); + + return young == filter->matching; +} + +static bool damos_va_filter_out(struct damos *scheme, struct folio *folio, + struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pmd_t *pmdp) +{ + struct damos_filter *filter; + bool matched; + + if (scheme->core_filters_allowed) + return false; + + damos_for_each_ops_filter(filter, scheme) { + /* + * damos_folio_filter_match checks the young filter by doing an + * rmap on the folio to find its page table. However, being the + * vaddr scheme, we have direct access to the page tables, so + * use that instead. + */ + if (filter->type == DAMOS_FILTER_TYPE_YOUNG) + matched = damos_va_filter_young_match(filter, folio, + vma, addr, ptep, pmdp); + else + matched = damos_folio_filter_match(filter, folio); + + if (matched) + return !filter->allow; + } + return scheme->ops_filters_default_reject; +} + +struct damos_va_migrate_private { + struct list_head *migration_lists; + struct damos *scheme; +}; + +/* + * Place the given folio in the migration_list corresponding to where the folio + * should be migrated. + * + * The algorithm used here is similar to weighted_interleave_nid() + */ +static void damos_va_migrate_dests_add(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr, + struct damos_migrate_dests *dests, + struct list_head *migration_lists) +{ + pgoff_t ilx; + int order; + unsigned int target; + unsigned int weight_total = 0; + int i; + + /* + * If dests is empty, there is only one migration list corresponding + * to s->target_nid. + */ + if (!dests->nr_dests) { + i = 0; + goto isolate; + } + + order = folio_order(folio); + ilx = vma->vm_pgoff >> order; + ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); + + for (i = 0; i < dests->nr_dests; i++) + weight_total += dests->weight_arr[i]; + + /* If the total weights are somehow 0, don't migrate at all */ + if (!weight_total) + return; + + target = ilx % weight_total; + for (i = 0; i < dests->nr_dests; i++) { + if (target < dests->weight_arr[i]) + break; + target -= dests->weight_arr[i]; + } + + /* If the folio is already in the right node, don't do anything */ + if (folio_nid(folio) == dests->node_id_arr[i]) + return; + +isolate: + if (!folio_isolate_lru(folio)) + return; + + list_add(&folio->lru, &migration_lists[i]); +} + +static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + struct damos_va_migrate_private *priv = walk->private; + struct list_head *migration_lists = priv->migration_lists; + struct damos *s = priv->scheme; + struct damos_migrate_dests *dests = &s->migrate_dests; + struct folio *folio; + spinlock_t *ptl; + pte_t *start_pte, *pte, ptent; + int nr; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + ptl = pmd_trans_huge_lock(pmd, walk->vma); + if (ptl) { + pmd_t pmde = pmdp_get(pmd); + + if (!pmd_present(pmde)) + goto huge_out; + folio = vm_normal_folio_pmd(walk->vma, addr, pmde); + if (!folio) + goto huge_out; + if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)) + goto huge_out; + damos_va_migrate_dests_add(folio, walk->vma, addr, dests, + migration_lists); +huge_out: + spin_unlock(ptl); + return 0; + } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + + start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte) + return 0; + + for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { + nr = 1; + ptent = ptep_get(pte); + + if (pte_none(ptent) || !pte_present(ptent)) + continue; + folio = vm_normal_folio(walk->vma, addr, ptent); + if (!folio) + continue; + if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)) + return 0; + damos_va_migrate_dests_add(folio, walk->vma, addr, dests, + migration_lists); + nr = folio_nr_pages(folio); + } + pte_unmap_unlock(start_pte, ptl); + return 0; +} + +/* + * Functions for the target validity check and cleanup + */ + +static bool damon_va_target_valid(struct damon_target *t) +{ + struct task_struct *task; + + task = damon_get_task_struct(t); + if (task) { + put_task_struct(task); + return true; + } + + return false; +} + +static void damon_va_cleanup_target(struct damon_target *t) +{ + put_pid(t->pid); +} + +#ifndef CONFIG_ADVISE_SYSCALLS +static unsigned long damos_madvise(struct damon_target *target, + struct damon_region *r, int behavior) +{ + return 0; +} +#else +static unsigned long damos_madvise(struct damon_target *target, + struct damon_region *r, int behavior) +{ + struct mm_struct *mm; + unsigned long start = PAGE_ALIGN(r->ar.start); + unsigned long len = PAGE_ALIGN(damon_sz_region(r)); + unsigned long applied; + + mm = damon_get_mm(target); + if (!mm) + return 0; + + applied = do_madvise(mm, start, len, behavior) ? 0 : len; + mmput(mm); + + return applied; +} +#endif /* CONFIG_ADVISE_SYSCALLS */ + +static unsigned long damos_va_migrate(struct damon_target *target, + struct damon_region *r, struct damos *s, + unsigned long *sz_filter_passed) +{ + LIST_HEAD(folio_list); + struct damos_va_migrate_private priv; + struct mm_struct *mm; + int nr_dests; + int nid; + bool use_target_nid; + unsigned long applied = 0; + struct damos_migrate_dests *dests = &s->migrate_dests; + struct mm_walk_ops walk_ops = { + .pmd_entry = damos_va_migrate_pmd_entry, + .pte_entry = NULL, + .walk_lock = PGWALK_RDLOCK, + }; + + use_target_nid = dests->nr_dests == 0; + nr_dests = use_target_nid ? 1 : dests->nr_dests; + priv.scheme = s; + priv.migration_lists = kmalloc_array(nr_dests, + sizeof(*priv.migration_lists), GFP_KERNEL); + if (!priv.migration_lists) + return 0; + + for (int i = 0; i < nr_dests; i++) + INIT_LIST_HEAD(&priv.migration_lists[i]); + + + mm = damon_get_mm(target); + if (!mm) + goto free_lists; + + mmap_read_lock(mm); + walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); + mmap_read_unlock(mm); + mmput(mm); + + for (int i = 0; i < nr_dests; i++) { + nid = use_target_nid ? s->target_nid : dests->node_id_arr[i]; + applied += damon_migrate_pages(&priv.migration_lists[i], nid); + cond_resched(); + } + +free_lists: + kfree(priv.migration_lists); + return applied * PAGE_SIZE; +} + +struct damos_va_stat_private { + struct damos *scheme; + unsigned long *sz_filter_passed; +}; + +static inline bool damos_va_invalid_folio(struct folio *folio, + struct damos *s) +{ + return !folio || folio == s->last_applied; +} + +static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + struct damos_va_stat_private *priv = walk->private; + struct damos *s = priv->scheme; + unsigned long *sz_filter_passed = priv->sz_filter_passed; + struct vm_area_struct *vma = walk->vma; + struct folio *folio; + spinlock_t *ptl; + pte_t *start_pte, *pte, ptent; + int nr; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + ptl = pmd_trans_huge_lock(pmd, vma); + if (ptl) { + pmd_t pmde = pmdp_get(pmd); + + if (!pmd_present(pmde)) + goto huge_unlock; + + folio = vm_normal_folio_pmd(vma, addr, pmde); + + if (damos_va_invalid_folio(folio, s)) + goto huge_unlock; + + if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd)) + *sz_filter_passed += folio_size(folio); + s->last_applied = folio; + +huge_unlock: + spin_unlock(ptl); + return 0; + } +#endif + start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!start_pte) + return 0; + + for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) { + nr = 1; + ptent = ptep_get(pte); + + if (pte_none(ptent) || !pte_present(ptent)) + continue; + + folio = vm_normal_folio(vma, addr, ptent); + + if (damos_va_invalid_folio(folio, s)) + continue; + + if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL)) + *sz_filter_passed += folio_size(folio); + nr = folio_nr_pages(folio); + s->last_applied = folio; + } + pte_unmap_unlock(start_pte, ptl); + return 0; +} + +static unsigned long damos_va_stat(struct damon_target *target, + struct damon_region *r, struct damos *s, + unsigned long *sz_filter_passed) +{ + struct damos_va_stat_private priv; + struct mm_struct *mm; + struct mm_walk_ops walk_ops = { + .pmd_entry = damos_va_stat_pmd_entry, + .walk_lock = PGWALK_RDLOCK, + }; + + priv.scheme = s; + priv.sz_filter_passed = sz_filter_passed; + + if (!damos_ops_has_filter(s)) + return 0; + + mm = damon_get_mm(target); + if (!mm) + return 0; + + mmap_read_lock(mm); + walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); + mmap_read_unlock(mm); + mmput(mm); + return 0; +} + +static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *scheme, unsigned long *sz_filter_passed) +{ + int madv_action; + + switch (scheme->action) { + case DAMOS_WILLNEED: + madv_action = MADV_WILLNEED; + break; + case DAMOS_COLD: + madv_action = MADV_COLD; + break; + case DAMOS_PAGEOUT: + madv_action = MADV_PAGEOUT; + break; + case DAMOS_HUGEPAGE: + madv_action = MADV_HUGEPAGE; + break; + case DAMOS_NOHUGEPAGE: + madv_action = MADV_NOHUGEPAGE; + break; + case DAMOS_MIGRATE_HOT: + case DAMOS_MIGRATE_COLD: + return damos_va_migrate(t, r, scheme, sz_filter_passed); + case DAMOS_STAT: + return damos_va_stat(t, r, scheme, sz_filter_passed); + default: + /* + * DAMOS actions that are not yet supported by 'vaddr'. + */ + return 0; + } + + return damos_madvise(t, r, madv_action); +} + +static int damon_va_scheme_score(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_cold_score(context, r, scheme); + case DAMOS_MIGRATE_HOT: + return damon_hot_score(context, r, scheme); + case DAMOS_MIGRATE_COLD: + return damon_cold_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + +static int __init damon_va_initcall(void) +{ + struct damon_operations ops = { + .id = DAMON_OPS_VADDR, + .init = damon_va_init, + .update = damon_va_update, + .prepare_access_checks = damon_va_prepare_access_checks, + .check_accesses = damon_va_check_accesses, + .target_valid = damon_va_target_valid, + .cleanup_target = damon_va_cleanup_target, + .cleanup = NULL, + .apply_scheme = damon_va_apply_scheme, + .get_scheme_score = damon_va_scheme_score, + }; + /* ops for fixed virtual address ranges */ + struct damon_operations ops_fvaddr = ops; + int err; + + /* Don't set the monitoring target regions for the entire mapping */ + ops_fvaddr.id = DAMON_OPS_FVADDR; + ops_fvaddr.init = NULL; + ops_fvaddr.update = NULL; + + err = damon_register_ops(&ops); + if (err) + return err; + return damon_register_ops(&ops_fvaddr); +}; + +subsys_initcall(damon_va_initcall); + +#include "tests/vaddr-kunit.h" |
