summaryrefslogtreecommitdiff
path: root/mm/damon
diff options
context:
space:
mode:
Diffstat (limited to 'mm/damon')
-rw-r--r--mm/damon/Kconfig113
-rw-r--r--mm/damon/Makefile9
-rw-r--r--mm/damon/core.c2941
-rw-r--r--mm/damon/lru_sort.c394
-rw-r--r--mm/damon/modules-common.c42
-rw-r--r--mm/damon/modules-common.h49
-rw-r--r--mm/damon/ops-common.c429
-rw-r--r--mm/damon/ops-common.h25
-rw-r--r--mm/damon/paddr.c386
-rw-r--r--mm/damon/reclaim.c398
-rw-r--r--mm/damon/stat.c276
-rw-r--r--mm/damon/sysfs-common.c107
-rw-r--r--mm/damon/sysfs-common.h61
-rw-r--r--mm/damon/sysfs-schemes.c2832
-rw-r--r--mm/damon/sysfs.c2108
-rw-r--r--mm/damon/tests/.kunitconfig15
-rw-r--r--mm/damon/tests/core-kunit.h1249
-rw-r--r--mm/damon/tests/sysfs-kunit.h112
-rw-r--r--mm/damon/tests/vaddr-kunit.h348
-rw-r--r--mm/damon/vaddr.c1038
20 files changed, 12932 insertions, 0 deletions
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig
new file mode 100644
index 000000000000..8c868f7035fc
--- /dev/null
+++ b/mm/damon/Kconfig
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+menu "Data Access Monitoring"
+
+config DAMON
+ bool "DAMON: Data Access Monitoring Framework"
+ help
+ This builds a framework that allows kernel subsystems to monitor
+ access frequency of each memory region. The information can be useful
+ for performance-centric DRAM level memory management.
+
+ See https://www.kernel.org/doc/html/latest/mm/damon/index.html for
+ more information.
+
+config DAMON_KUNIT_TEST
+ bool "Test for damon" if !KUNIT_ALL_TESTS
+ depends on DAMON && KUNIT=y
+ default KUNIT_ALL_TESTS
+ help
+ This builds the DAMON Kunit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
+config DAMON_VADDR
+ bool "Data access monitoring operations for virtual address spaces"
+ depends on DAMON && MMU
+ select PAGE_IDLE_FLAG
+ default DAMON
+ help
+ This builds the default data access monitoring operations for DAMON
+ that work for virtual address spaces.
+
+config DAMON_PADDR
+ bool "Data access monitoring operations for the physical address space"
+ depends on DAMON && MMU
+ select PAGE_IDLE_FLAG
+ default DAMON
+ help
+ This builds the default data access monitoring operations for DAMON
+ that works for the physical address space.
+
+config DAMON_VADDR_KUNIT_TEST
+ bool "Test for DAMON operations" if !KUNIT_ALL_TESTS
+ depends on DAMON_VADDR && KUNIT=y
+ default KUNIT_ALL_TESTS
+ help
+ This builds the DAMON virtual addresses operations Kunit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
+config DAMON_SYSFS
+ bool "DAMON sysfs interface"
+ depends on DAMON && SYSFS
+ default DAMON
+ help
+ This builds the sysfs interface for DAMON. The user space can use
+ the interface for arbitrary data access monitoring.
+
+config DAMON_SYSFS_KUNIT_TEST
+ bool "Test for damon sysfs interface" if !KUNIT_ALL_TESTS
+ depends on DAMON_SYSFS && KUNIT=y
+ default KUNIT_ALL_TESTS
+ help
+ This builds the DAMON sysfs interface Kunit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
+config DAMON_RECLAIM
+ bool "Build DAMON-based reclaim (DAMON_RECLAIM)"
+ depends on DAMON_PADDR
+ help
+ This builds the DAMON-based reclamation subsystem. It finds pages
+ that not accessed for a long time (cold) using DAMON and reclaim
+ those.
+
+ This is suggested to be used as a proactive and lightweight
+ reclamation under light memory pressure, while the traditional page
+ scanning-based reclamation is used for heavy pressure.
+
+config DAMON_LRU_SORT
+ bool "Build DAMON-based LRU-lists sorting (DAMON_LRU_SORT)"
+ depends on DAMON_PADDR
+ help
+ This builds the DAMON-based LRU-lists sorting subsystem. It tries to
+ protect frequently accessed (hot) pages while rarely accessed (cold)
+ pages reclaimed first under memory pressure.
+
+config DAMON_STAT
+ bool "Build data access monitoring stat (DAMON_STAT)"
+ depends on DAMON_PADDR
+ help
+ This builds the DAMON-based access monitoring statistics subsystem.
+ It runs DAMON and expose access monitoring results in simple stat
+ metrics.
+
+config DAMON_STAT_ENABLED_DEFAULT
+ bool "Enable DAMON_STAT by default"
+ depends on DAMON_STAT
+ default DAMON_STAT
+ help
+ Whether to enable DAMON_STAT by default. Users can disable it in
+ boot or runtime using its 'enabled' parameter.
+
+endmenu
diff --git a/mm/damon/Makefile b/mm/damon/Makefile
new file mode 100644
index 000000000000..d8d6bf5f8bff
--- /dev/null
+++ b/mm/damon/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := core.o
+obj-$(CONFIG_DAMON_VADDR) += ops-common.o vaddr.o
+obj-$(CONFIG_DAMON_PADDR) += ops-common.o paddr.o
+obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs-schemes.o sysfs.o
+obj-$(CONFIG_DAMON_RECLAIM) += modules-common.o reclaim.o
+obj-$(CONFIG_DAMON_LRU_SORT) += modules-common.o lru_sort.o
+obj-$(CONFIG_DAMON_STAT) += modules-common.o stat.o
diff --git a/mm/damon/core.c b/mm/damon/core.c
new file mode 100644
index 000000000000..f9fc0375890a
--- /dev/null
+++ b/mm/damon/core.c
@@ -0,0 +1,2941 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data Access Monitor
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon: " fmt
+
+#include <linux/damon.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/memcontrol.h>
+#include <linux/mm.h>
+#include <linux/psi.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_choices.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/damon.h>
+
+static DEFINE_MUTEX(damon_lock);
+static int nr_running_ctxs;
+static bool running_exclusive_ctxs;
+
+static DEFINE_MUTEX(damon_ops_lock);
+static struct damon_operations damon_registered_ops[NR_DAMON_OPS];
+
+static struct kmem_cache *damon_region_cache __ro_after_init;
+
+/* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
+static bool __damon_is_registered_ops(enum damon_ops_id id)
+{
+ struct damon_operations empty_ops = {};
+
+ if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops)))
+ return false;
+ return true;
+}
+
+/**
+ * damon_is_registered_ops() - Check if a given damon_operations is registered.
+ * @id: Id of the damon_operations to check if registered.
+ *
+ * Return: true if the ops is set, false otherwise.
+ */
+bool damon_is_registered_ops(enum damon_ops_id id)
+{
+ bool registered;
+
+ if (id >= NR_DAMON_OPS)
+ return false;
+ mutex_lock(&damon_ops_lock);
+ registered = __damon_is_registered_ops(id);
+ mutex_unlock(&damon_ops_lock);
+ return registered;
+}
+
+/**
+ * damon_register_ops() - Register a monitoring operations set to DAMON.
+ * @ops: monitoring operations set to register.
+ *
+ * This function registers a monitoring operations set of valid &struct
+ * damon_operations->id so that others can find and use them later.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_register_ops(struct damon_operations *ops)
+{
+ int err = 0;
+
+ if (ops->id >= NR_DAMON_OPS)
+ return -EINVAL;
+
+ mutex_lock(&damon_ops_lock);
+ /* Fail for already registered ops */
+ if (__damon_is_registered_ops(ops->id))
+ err = -EINVAL;
+ else
+ damon_registered_ops[ops->id] = *ops;
+ mutex_unlock(&damon_ops_lock);
+ return err;
+}
+
+/**
+ * damon_select_ops() - Select a monitoring operations to use with the context.
+ * @ctx: monitoring context to use the operations.
+ * @id: id of the registered monitoring operations to select.
+ *
+ * This function finds registered monitoring operations set of @id and make
+ * @ctx to use it.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id)
+{
+ int err = 0;
+
+ if (id >= NR_DAMON_OPS)
+ return -EINVAL;
+
+ mutex_lock(&damon_ops_lock);
+ if (!__damon_is_registered_ops(id))
+ err = -EINVAL;
+ else
+ ctx->ops = damon_registered_ops[id];
+ mutex_unlock(&damon_ops_lock);
+ return err;
+}
+
+/*
+ * Construct a damon_region struct
+ *
+ * Returns the pointer to the new struct if success, or NULL otherwise
+ */
+struct damon_region *damon_new_region(unsigned long start, unsigned long end)
+{
+ struct damon_region *region;
+
+ region = kmem_cache_alloc(damon_region_cache, GFP_KERNEL);
+ if (!region)
+ return NULL;
+
+ region->ar.start = start;
+ region->ar.end = end;
+ region->nr_accesses = 0;
+ region->nr_accesses_bp = 0;
+ INIT_LIST_HEAD(&region->list);
+
+ region->age = 0;
+ region->last_nr_accesses = 0;
+
+ return region;
+}
+
+void damon_add_region(struct damon_region *r, struct damon_target *t)
+{
+ list_add_tail(&r->list, &t->regions_list);
+ t->nr_regions++;
+}
+
+static void damon_del_region(struct damon_region *r, struct damon_target *t)
+{
+ list_del(&r->list);
+ t->nr_regions--;
+}
+
+static void damon_free_region(struct damon_region *r)
+{
+ kmem_cache_free(damon_region_cache, r);
+}
+
+void damon_destroy_region(struct damon_region *r, struct damon_target *t)
+{
+ damon_del_region(r, t);
+ damon_free_region(r);
+}
+
+/*
+ * Check whether a region is intersecting an address range
+ *
+ * Returns true if it is.
+ */
+static bool damon_intersect(struct damon_region *r,
+ struct damon_addr_range *re)
+{
+ return !(r->ar.end <= re->start || re->end <= r->ar.start);
+}
+
+/*
+ * Fill holes in regions with new regions.
+ */
+static int damon_fill_regions_holes(struct damon_region *first,
+ struct damon_region *last, struct damon_target *t)
+{
+ struct damon_region *r = first;
+
+ damon_for_each_region_from(r, t) {
+ struct damon_region *next, *newr;
+
+ if (r == last)
+ break;
+ next = damon_next_region(r);
+ if (r->ar.end != next->ar.start) {
+ newr = damon_new_region(r->ar.end, next->ar.start);
+ if (!newr)
+ return -ENOMEM;
+ damon_insert_region(newr, r, next, t);
+ }
+ }
+ return 0;
+}
+
+/*
+ * damon_set_regions() - Set regions of a target for given address ranges.
+ * @t: the given target.
+ * @ranges: array of new monitoring target ranges.
+ * @nr_ranges: length of @ranges.
+ * @min_sz_region: minimum region size.
+ *
+ * This function adds new regions to, or modify existing regions of a
+ * monitoring target to fit in specific ranges.
+ *
+ * Return: 0 if success, or negative error code otherwise.
+ */
+int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
+ unsigned int nr_ranges, unsigned long min_sz_region)
+{
+ struct damon_region *r, *next;
+ unsigned int i;
+ int err;
+
+ /* Remove regions which are not in the new ranges */
+ damon_for_each_region_safe(r, next, t) {
+ for (i = 0; i < nr_ranges; i++) {
+ if (damon_intersect(r, &ranges[i]))
+ break;
+ }
+ if (i == nr_ranges)
+ damon_destroy_region(r, t);
+ }
+
+ r = damon_first_region(t);
+ /* Add new regions or resize existing regions to fit in the ranges */
+ for (i = 0; i < nr_ranges; i++) {
+ struct damon_region *first = NULL, *last, *newr;
+ struct damon_addr_range *range;
+
+ range = &ranges[i];
+ /* Get the first/last regions intersecting with the range */
+ damon_for_each_region_from(r, t) {
+ if (damon_intersect(r, range)) {
+ if (!first)
+ first = r;
+ last = r;
+ }
+ if (r->ar.start >= range->end)
+ break;
+ }
+ if (!first) {
+ /* no region intersects with this range */
+ newr = damon_new_region(
+ ALIGN_DOWN(range->start,
+ min_sz_region),
+ ALIGN(range->end, min_sz_region));
+ if (!newr)
+ return -ENOMEM;
+ damon_insert_region(newr, damon_prev_region(r), r, t);
+ } else {
+ /* resize intersecting regions to fit in this range */
+ first->ar.start = ALIGN_DOWN(range->start,
+ min_sz_region);
+ last->ar.end = ALIGN(range->end, min_sz_region);
+
+ /* fill possible holes in the range */
+ err = damon_fill_regions_holes(first, last, t);
+ if (err)
+ return err;
+ }
+ }
+ return 0;
+}
+
+struct damos_filter *damos_new_filter(enum damos_filter_type type,
+ bool matching, bool allow)
+{
+ struct damos_filter *filter;
+
+ filter = kmalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter)
+ return NULL;
+ filter->type = type;
+ filter->matching = matching;
+ filter->allow = allow;
+ INIT_LIST_HEAD(&filter->list);
+ return filter;
+}
+
+/**
+ * damos_filter_for_ops() - Return if the filter is ops-hndled one.
+ * @type: type of the filter.
+ *
+ * Return: true if the filter of @type needs to be handled by ops layer, false
+ * otherwise.
+ */
+bool damos_filter_for_ops(enum damos_filter_type type)
+{
+ switch (type) {
+ case DAMOS_FILTER_TYPE_ADDR:
+ case DAMOS_FILTER_TYPE_TARGET:
+ return false;
+ default:
+ break;
+ }
+ return true;
+}
+
+void damos_add_filter(struct damos *s, struct damos_filter *f)
+{
+ if (damos_filter_for_ops(f->type))
+ list_add_tail(&f->list, &s->ops_filters);
+ else
+ list_add_tail(&f->list, &s->core_filters);
+}
+
+static void damos_del_filter(struct damos_filter *f)
+{
+ list_del(&f->list);
+}
+
+static void damos_free_filter(struct damos_filter *f)
+{
+ kfree(f);
+}
+
+void damos_destroy_filter(struct damos_filter *f)
+{
+ damos_del_filter(f);
+ damos_free_filter(f);
+}
+
+struct damos_quota_goal *damos_new_quota_goal(
+ enum damos_quota_goal_metric metric,
+ unsigned long target_value)
+{
+ struct damos_quota_goal *goal;
+
+ goal = kmalloc(sizeof(*goal), GFP_KERNEL);
+ if (!goal)
+ return NULL;
+ goal->metric = metric;
+ goal->target_value = target_value;
+ INIT_LIST_HEAD(&goal->list);
+ return goal;
+}
+
+void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g)
+{
+ list_add_tail(&g->list, &q->goals);
+}
+
+static void damos_del_quota_goal(struct damos_quota_goal *g)
+{
+ list_del(&g->list);
+}
+
+static void damos_free_quota_goal(struct damos_quota_goal *g)
+{
+ kfree(g);
+}
+
+void damos_destroy_quota_goal(struct damos_quota_goal *g)
+{
+ damos_del_quota_goal(g);
+ damos_free_quota_goal(g);
+}
+
+/* initialize fields of @quota that normally API users wouldn't set */
+static struct damos_quota *damos_quota_init(struct damos_quota *quota)
+{
+ quota->esz = 0;
+ quota->total_charged_sz = 0;
+ quota->total_charged_ns = 0;
+ quota->charged_sz = 0;
+ quota->charged_from = 0;
+ quota->charge_target_from = NULL;
+ quota->charge_addr_from = 0;
+ quota->esz_bp = 0;
+ return quota;
+}
+
+struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
+ enum damos_action action,
+ unsigned long apply_interval_us,
+ struct damos_quota *quota,
+ struct damos_watermarks *wmarks,
+ int target_nid)
+{
+ struct damos *scheme;
+
+ scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
+ if (!scheme)
+ return NULL;
+ scheme->pattern = *pattern;
+ scheme->action = action;
+ scheme->apply_interval_us = apply_interval_us;
+ /*
+ * next_apply_sis will be set when kdamond starts. While kdamond is
+ * running, it will also updated when it is added to the DAMON context,
+ * or damon_attrs are updated.
+ */
+ scheme->next_apply_sis = 0;
+ scheme->walk_completed = false;
+ INIT_LIST_HEAD(&scheme->core_filters);
+ INIT_LIST_HEAD(&scheme->ops_filters);
+ scheme->stat = (struct damos_stat){};
+ INIT_LIST_HEAD(&scheme->list);
+
+ scheme->quota = *(damos_quota_init(quota));
+ /* quota.goals should be separately set by caller */
+ INIT_LIST_HEAD(&scheme->quota.goals);
+
+ scheme->wmarks = *wmarks;
+ scheme->wmarks.activated = true;
+
+ scheme->migrate_dests = (struct damos_migrate_dests){};
+ scheme->target_nid = target_nid;
+
+ return scheme;
+}
+
+static void damos_set_next_apply_sis(struct damos *s, struct damon_ctx *ctx)
+{
+ unsigned long sample_interval = ctx->attrs.sample_interval ?
+ ctx->attrs.sample_interval : 1;
+ unsigned long apply_interval = s->apply_interval_us ?
+ s->apply_interval_us : ctx->attrs.aggr_interval;
+
+ s->next_apply_sis = ctx->passed_sample_intervals +
+ apply_interval / sample_interval;
+}
+
+void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
+{
+ list_add_tail(&s->list, &ctx->schemes);
+ damos_set_next_apply_sis(s, ctx);
+}
+
+static void damon_del_scheme(struct damos *s)
+{
+ list_del(&s->list);
+}
+
+static void damon_free_scheme(struct damos *s)
+{
+ kfree(s);
+}
+
+void damon_destroy_scheme(struct damos *s)
+{
+ struct damos_quota_goal *g, *g_next;
+ struct damos_filter *f, *next;
+
+ damos_for_each_quota_goal_safe(g, g_next, &s->quota)
+ damos_destroy_quota_goal(g);
+
+ damos_for_each_core_filter_safe(f, next, s)
+ damos_destroy_filter(f);
+
+ damos_for_each_ops_filter_safe(f, next, s)
+ damos_destroy_filter(f);
+
+ kfree(s->migrate_dests.node_id_arr);
+ kfree(s->migrate_dests.weight_arr);
+ damon_del_scheme(s);
+ damon_free_scheme(s);
+}
+
+/*
+ * Construct a damon_target struct
+ *
+ * Returns the pointer to the new struct if success, or NULL otherwise
+ */
+struct damon_target *damon_new_target(void)
+{
+ struct damon_target *t;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return NULL;
+
+ t->pid = NULL;
+ t->nr_regions = 0;
+ INIT_LIST_HEAD(&t->regions_list);
+ INIT_LIST_HEAD(&t->list);
+ t->obsolete = false;
+
+ return t;
+}
+
+void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
+{
+ list_add_tail(&t->list, &ctx->adaptive_targets);
+}
+
+bool damon_targets_empty(struct damon_ctx *ctx)
+{
+ return list_empty(&ctx->adaptive_targets);
+}
+
+static void damon_del_target(struct damon_target *t)
+{
+ list_del(&t->list);
+}
+
+void damon_free_target(struct damon_target *t)
+{
+ struct damon_region *r, *next;
+
+ damon_for_each_region_safe(r, next, t)
+ damon_free_region(r);
+ kfree(t);
+}
+
+void damon_destroy_target(struct damon_target *t, struct damon_ctx *ctx)
+{
+
+ if (ctx && ctx->ops.cleanup_target)
+ ctx->ops.cleanup_target(t);
+
+ damon_del_target(t);
+ damon_free_target(t);
+}
+
+unsigned int damon_nr_regions(struct damon_target *t)
+{
+ return t->nr_regions;
+}
+
+struct damon_ctx *damon_new_ctx(void)
+{
+ struct damon_ctx *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return NULL;
+
+ init_completion(&ctx->kdamond_started);
+
+ ctx->attrs.sample_interval = 5 * 1000;
+ ctx->attrs.aggr_interval = 100 * 1000;
+ ctx->attrs.ops_update_interval = 60 * 1000 * 1000;
+
+ ctx->passed_sample_intervals = 0;
+ /* These will be set from kdamond_init_ctx() */
+ ctx->next_aggregation_sis = 0;
+ ctx->next_ops_update_sis = 0;
+
+ mutex_init(&ctx->kdamond_lock);
+ INIT_LIST_HEAD(&ctx->call_controls);
+ mutex_init(&ctx->call_controls_lock);
+ mutex_init(&ctx->walk_control_lock);
+
+ ctx->attrs.min_nr_regions = 10;
+ ctx->attrs.max_nr_regions = 1000;
+
+ ctx->addr_unit = 1;
+ ctx->min_sz_region = DAMON_MIN_REGION;
+
+ INIT_LIST_HEAD(&ctx->adaptive_targets);
+ INIT_LIST_HEAD(&ctx->schemes);
+
+ return ctx;
+}
+
+static void damon_destroy_targets(struct damon_ctx *ctx)
+{
+ struct damon_target *t, *next_t;
+
+ damon_for_each_target_safe(t, next_t, ctx)
+ damon_destroy_target(t, ctx);
+}
+
+void damon_destroy_ctx(struct damon_ctx *ctx)
+{
+ struct damos *s, *next_s;
+
+ damon_destroy_targets(ctx);
+
+ damon_for_each_scheme_safe(s, next_s, ctx)
+ damon_destroy_scheme(s);
+
+ kfree(ctx);
+}
+
+static bool damon_attrs_equals(const struct damon_attrs *attrs1,
+ const struct damon_attrs *attrs2)
+{
+ const struct damon_intervals_goal *ig1 = &attrs1->intervals_goal;
+ const struct damon_intervals_goal *ig2 = &attrs2->intervals_goal;
+
+ return attrs1->sample_interval == attrs2->sample_interval &&
+ attrs1->aggr_interval == attrs2->aggr_interval &&
+ attrs1->ops_update_interval == attrs2->ops_update_interval &&
+ attrs1->min_nr_regions == attrs2->min_nr_regions &&
+ attrs1->max_nr_regions == attrs2->max_nr_regions &&
+ ig1->access_bp == ig2->access_bp &&
+ ig1->aggrs == ig2->aggrs &&
+ ig1->min_sample_us == ig2->min_sample_us &&
+ ig1->max_sample_us == ig2->max_sample_us;
+}
+
+static unsigned int damon_age_for_new_attrs(unsigned int age,
+ struct damon_attrs *old_attrs, struct damon_attrs *new_attrs)
+{
+ return age * old_attrs->aggr_interval / new_attrs->aggr_interval;
+}
+
+/* convert access ratio in bp (per 10,000) to nr_accesses */
+static unsigned int damon_accesses_bp_to_nr_accesses(
+ unsigned int accesses_bp, struct damon_attrs *attrs)
+{
+ return accesses_bp * damon_max_nr_accesses(attrs) / 10000;
+}
+
+/*
+ * Convert nr_accesses to access ratio in bp (per 10,000).
+ *
+ * Callers should ensure attrs.aggr_interval is not zero, like
+ * damon_update_monitoring_results() does . Otherwise, divide-by-zero would
+ * happen.
+ */
+static unsigned int damon_nr_accesses_to_accesses_bp(
+ unsigned int nr_accesses, struct damon_attrs *attrs)
+{
+ return nr_accesses * 10000 / damon_max_nr_accesses(attrs);
+}
+
+static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses,
+ struct damon_attrs *old_attrs, struct damon_attrs *new_attrs)
+{
+ return damon_accesses_bp_to_nr_accesses(
+ damon_nr_accesses_to_accesses_bp(
+ nr_accesses, old_attrs),
+ new_attrs);
+}
+
+static void damon_update_monitoring_result(struct damon_region *r,
+ struct damon_attrs *old_attrs, struct damon_attrs *new_attrs,
+ bool aggregating)
+{
+ if (!aggregating) {
+ r->nr_accesses = damon_nr_accesses_for_new_attrs(
+ r->nr_accesses, old_attrs, new_attrs);
+ r->nr_accesses_bp = r->nr_accesses * 10000;
+ } else {
+ /*
+ * if this is called in the middle of the aggregation, reset
+ * the aggregations we made so far for this aggregation
+ * interval. In other words, make the status like
+ * kdamond_reset_aggregated() is called.
+ */
+ r->last_nr_accesses = damon_nr_accesses_for_new_attrs(
+ r->last_nr_accesses, old_attrs, new_attrs);
+ r->nr_accesses_bp = r->last_nr_accesses * 10000;
+ r->nr_accesses = 0;
+ }
+ r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs);
+}
+
+/*
+ * region->nr_accesses is the number of sampling intervals in the last
+ * aggregation interval that access to the region has found, and region->age is
+ * the number of aggregation intervals that its access pattern has maintained.
+ * For the reason, the real meaning of the two fields depend on current
+ * sampling interval and aggregation interval. This function updates
+ * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs.
+ */
+static void damon_update_monitoring_results(struct damon_ctx *ctx,
+ struct damon_attrs *new_attrs, bool aggregating)
+{
+ struct damon_attrs *old_attrs = &ctx->attrs;
+ struct damon_target *t;
+ struct damon_region *r;
+
+ /* if any interval is zero, simply forgive conversion */
+ if (!old_attrs->sample_interval || !old_attrs->aggr_interval ||
+ !new_attrs->sample_interval ||
+ !new_attrs->aggr_interval)
+ return;
+
+ damon_for_each_target(t, ctx)
+ damon_for_each_region(r, t)
+ damon_update_monitoring_result(
+ r, old_attrs, new_attrs, aggregating);
+}
+
+/*
+ * damon_valid_intervals_goal() - return if the intervals goal of @attrs is
+ * valid.
+ */
+static bool damon_valid_intervals_goal(struct damon_attrs *attrs)
+{
+ struct damon_intervals_goal *goal = &attrs->intervals_goal;
+
+ /* tuning is disabled */
+ if (!goal->aggrs)
+ return true;
+ if (goal->min_sample_us > goal->max_sample_us)
+ return false;
+ if (attrs->sample_interval < goal->min_sample_us ||
+ goal->max_sample_us < attrs->sample_interval)
+ return false;
+ return true;
+}
+
+/**
+ * damon_set_attrs() - Set attributes for the monitoring.
+ * @ctx: monitoring context
+ * @attrs: monitoring attributes
+ *
+ * This function should be called while the kdamond is not running, an access
+ * check results aggregation is not ongoing (e.g., from damon_call().
+ *
+ * Every time interval is in micro-seconds.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs)
+{
+ unsigned long sample_interval = attrs->sample_interval ?
+ attrs->sample_interval : 1;
+ struct damos *s;
+ bool aggregating = ctx->passed_sample_intervals <
+ ctx->next_aggregation_sis;
+
+ if (!damon_valid_intervals_goal(attrs))
+ return -EINVAL;
+
+ if (attrs->min_nr_regions < 3)
+ return -EINVAL;
+ if (attrs->min_nr_regions > attrs->max_nr_regions)
+ return -EINVAL;
+ if (attrs->sample_interval > attrs->aggr_interval)
+ return -EINVAL;
+
+ /* calls from core-external doesn't set this. */
+ if (!attrs->aggr_samples)
+ attrs->aggr_samples = attrs->aggr_interval / sample_interval;
+
+ ctx->next_aggregation_sis = ctx->passed_sample_intervals +
+ attrs->aggr_interval / sample_interval;
+ ctx->next_ops_update_sis = ctx->passed_sample_intervals +
+ attrs->ops_update_interval / sample_interval;
+
+ damon_update_monitoring_results(ctx, attrs, aggregating);
+ ctx->attrs = *attrs;
+
+ damon_for_each_scheme(s, ctx)
+ damos_set_next_apply_sis(s, ctx);
+
+ return 0;
+}
+
+/**
+ * damon_set_schemes() - Set data access monitoring based operation schemes.
+ * @ctx: monitoring context
+ * @schemes: array of the schemes
+ * @nr_schemes: number of entries in @schemes
+ *
+ * This function should not be called while the kdamond of the context is
+ * running.
+ */
+void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
+ ssize_t nr_schemes)
+{
+ struct damos *s, *next;
+ ssize_t i;
+
+ damon_for_each_scheme_safe(s, next, ctx)
+ damon_destroy_scheme(s);
+ for (i = 0; i < nr_schemes; i++)
+ damon_add_scheme(ctx, schemes[i]);
+}
+
+static struct damos_quota_goal *damos_nth_quota_goal(
+ int n, struct damos_quota *q)
+{
+ struct damos_quota_goal *goal;
+ int i = 0;
+
+ damos_for_each_quota_goal(goal, q) {
+ if (i++ == n)
+ return goal;
+ }
+ return NULL;
+}
+
+static void damos_commit_quota_goal_union(
+ struct damos_quota_goal *dst, struct damos_quota_goal *src)
+{
+ switch (dst->metric) {
+ case DAMOS_QUOTA_NODE_MEM_USED_BP:
+ case DAMOS_QUOTA_NODE_MEM_FREE_BP:
+ dst->nid = src->nid;
+ break;
+ case DAMOS_QUOTA_NODE_MEMCG_USED_BP:
+ case DAMOS_QUOTA_NODE_MEMCG_FREE_BP:
+ dst->nid = src->nid;
+ dst->memcg_id = src->memcg_id;
+ break;
+ default:
+ break;
+ }
+}
+
+static void damos_commit_quota_goal(
+ struct damos_quota_goal *dst, struct damos_quota_goal *src)
+{
+ dst->metric = src->metric;
+ dst->target_value = src->target_value;
+ if (dst->metric == DAMOS_QUOTA_USER_INPUT)
+ dst->current_value = src->current_value;
+ /* keep last_psi_total as is, since it will be updated in next cycle */
+ damos_commit_quota_goal_union(dst, src);
+}
+
+/**
+ * damos_commit_quota_goals() - Commit DAMOS quota goals to another quota.
+ * @dst: The commit destination DAMOS quota.
+ * @src: The commit source DAMOS quota.
+ *
+ * Copies user-specified parameters for quota goals from @src to @dst. Users
+ * should use this function for quota goals-level parameters update of running
+ * DAMON contexts, instead of manual in-place updates.
+ *
+ * This function should be called from parameters-update safe context, like
+ * damon_call().
+ */
+int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src)
+{
+ struct damos_quota_goal *dst_goal, *next, *src_goal, *new_goal;
+ int i = 0, j = 0;
+
+ damos_for_each_quota_goal_safe(dst_goal, next, dst) {
+ src_goal = damos_nth_quota_goal(i++, src);
+ if (src_goal)
+ damos_commit_quota_goal(dst_goal, src_goal);
+ else
+ damos_destroy_quota_goal(dst_goal);
+ }
+ damos_for_each_quota_goal_safe(src_goal, next, src) {
+ if (j++ < i)
+ continue;
+ new_goal = damos_new_quota_goal(
+ src_goal->metric, src_goal->target_value);
+ if (!new_goal)
+ return -ENOMEM;
+ damos_commit_quota_goal(new_goal, src_goal);
+ damos_add_quota_goal(dst, new_goal);
+ }
+ return 0;
+}
+
+static int damos_commit_quota(struct damos_quota *dst, struct damos_quota *src)
+{
+ int err;
+
+ dst->reset_interval = src->reset_interval;
+ dst->ms = src->ms;
+ dst->sz = src->sz;
+ err = damos_commit_quota_goals(dst, src);
+ if (err)
+ return err;
+ dst->weight_sz = src->weight_sz;
+ dst->weight_nr_accesses = src->weight_nr_accesses;
+ dst->weight_age = src->weight_age;
+ return 0;
+}
+
+static struct damos_filter *damos_nth_core_filter(int n, struct damos *s)
+{
+ struct damos_filter *filter;
+ int i = 0;
+
+ damos_for_each_core_filter(filter, s) {
+ if (i++ == n)
+ return filter;
+ }
+ return NULL;
+}
+
+static struct damos_filter *damos_nth_ops_filter(int n, struct damos *s)
+{
+ struct damos_filter *filter;
+ int i = 0;
+
+ damos_for_each_ops_filter(filter, s) {
+ if (i++ == n)
+ return filter;
+ }
+ return NULL;
+}
+
+static void damos_commit_filter_arg(
+ struct damos_filter *dst, struct damos_filter *src)
+{
+ switch (dst->type) {
+ case DAMOS_FILTER_TYPE_MEMCG:
+ dst->memcg_id = src->memcg_id;
+ break;
+ case DAMOS_FILTER_TYPE_ADDR:
+ dst->addr_range = src->addr_range;
+ break;
+ case DAMOS_FILTER_TYPE_TARGET:
+ dst->target_idx = src->target_idx;
+ break;
+ case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE:
+ dst->sz_range = src->sz_range;
+ break;
+ default:
+ break;
+ }
+}
+
+static void damos_commit_filter(
+ struct damos_filter *dst, struct damos_filter *src)
+{
+ dst->type = src->type;
+ dst->matching = src->matching;
+ dst->allow = src->allow;
+ damos_commit_filter_arg(dst, src);
+}
+
+static int damos_commit_core_filters(struct damos *dst, struct damos *src)
+{
+ struct damos_filter *dst_filter, *next, *src_filter, *new_filter;
+ int i = 0, j = 0;
+
+ damos_for_each_core_filter_safe(dst_filter, next, dst) {
+ src_filter = damos_nth_core_filter(i++, src);
+ if (src_filter)
+ damos_commit_filter(dst_filter, src_filter);
+ else
+ damos_destroy_filter(dst_filter);
+ }
+
+ damos_for_each_core_filter_safe(src_filter, next, src) {
+ if (j++ < i)
+ continue;
+
+ new_filter = damos_new_filter(
+ src_filter->type, src_filter->matching,
+ src_filter->allow);
+ if (!new_filter)
+ return -ENOMEM;
+ damos_commit_filter_arg(new_filter, src_filter);
+ damos_add_filter(dst, new_filter);
+ }
+ return 0;
+}
+
+static int damos_commit_ops_filters(struct damos *dst, struct damos *src)
+{
+ struct damos_filter *dst_filter, *next, *src_filter, *new_filter;
+ int i = 0, j = 0;
+
+ damos_for_each_ops_filter_safe(dst_filter, next, dst) {
+ src_filter = damos_nth_ops_filter(i++, src);
+ if (src_filter)
+ damos_commit_filter(dst_filter, src_filter);
+ else
+ damos_destroy_filter(dst_filter);
+ }
+
+ damos_for_each_ops_filter_safe(src_filter, next, src) {
+ if (j++ < i)
+ continue;
+
+ new_filter = damos_new_filter(
+ src_filter->type, src_filter->matching,
+ src_filter->allow);
+ if (!new_filter)
+ return -ENOMEM;
+ damos_commit_filter_arg(new_filter, src_filter);
+ damos_add_filter(dst, new_filter);
+ }
+ return 0;
+}
+
+/**
+ * damos_filters_default_reject() - decide whether to reject memory that didn't
+ * match with any given filter.
+ * @filters: Given DAMOS filters of a group.
+ */
+static bool damos_filters_default_reject(struct list_head *filters)
+{
+ struct damos_filter *last_filter;
+
+ if (list_empty(filters))
+ return false;
+ last_filter = list_last_entry(filters, struct damos_filter, list);
+ return last_filter->allow;
+}
+
+static void damos_set_filters_default_reject(struct damos *s)
+{
+ if (!list_empty(&s->ops_filters))
+ s->core_filters_default_reject = false;
+ else
+ s->core_filters_default_reject =
+ damos_filters_default_reject(&s->core_filters);
+ s->ops_filters_default_reject =
+ damos_filters_default_reject(&s->ops_filters);
+}
+
+static int damos_commit_dests(struct damos_migrate_dests *dst,
+ struct damos_migrate_dests *src)
+{
+ if (dst->nr_dests != src->nr_dests) {
+ kfree(dst->node_id_arr);
+ kfree(dst->weight_arr);
+
+ dst->node_id_arr = kmalloc_array(src->nr_dests,
+ sizeof(*dst->node_id_arr), GFP_KERNEL);
+ if (!dst->node_id_arr) {
+ dst->weight_arr = NULL;
+ return -ENOMEM;
+ }
+
+ dst->weight_arr = kmalloc_array(src->nr_dests,
+ sizeof(*dst->weight_arr), GFP_KERNEL);
+ if (!dst->weight_arr) {
+ /* ->node_id_arr will be freed by scheme destruction */
+ return -ENOMEM;
+ }
+ }
+
+ dst->nr_dests = src->nr_dests;
+ for (int i = 0; i < src->nr_dests; i++) {
+ dst->node_id_arr[i] = src->node_id_arr[i];
+ dst->weight_arr[i] = src->weight_arr[i];
+ }
+
+ return 0;
+}
+
+static int damos_commit_filters(struct damos *dst, struct damos *src)
+{
+ int err;
+
+ err = damos_commit_core_filters(dst, src);
+ if (err)
+ return err;
+ err = damos_commit_ops_filters(dst, src);
+ if (err)
+ return err;
+ damos_set_filters_default_reject(dst);
+ return 0;
+}
+
+static struct damos *damon_nth_scheme(int n, struct damon_ctx *ctx)
+{
+ struct damos *s;
+ int i = 0;
+
+ damon_for_each_scheme(s, ctx) {
+ if (i++ == n)
+ return s;
+ }
+ return NULL;
+}
+
+static int damos_commit(struct damos *dst, struct damos *src)
+{
+ int err;
+
+ dst->pattern = src->pattern;
+ dst->action = src->action;
+ dst->apply_interval_us = src->apply_interval_us;
+
+ err = damos_commit_quota(&dst->quota, &src->quota);
+ if (err)
+ return err;
+
+ dst->wmarks = src->wmarks;
+ dst->target_nid = src->target_nid;
+
+ err = damos_commit_dests(&dst->migrate_dests, &src->migrate_dests);
+ if (err)
+ return err;
+
+ err = damos_commit_filters(dst, src);
+ return err;
+}
+
+static int damon_commit_schemes(struct damon_ctx *dst, struct damon_ctx *src)
+{
+ struct damos *dst_scheme, *next, *src_scheme, *new_scheme;
+ int i = 0, j = 0, err;
+
+ damon_for_each_scheme_safe(dst_scheme, next, dst) {
+ src_scheme = damon_nth_scheme(i++, src);
+ if (src_scheme) {
+ err = damos_commit(dst_scheme, src_scheme);
+ if (err)
+ return err;
+ } else {
+ damon_destroy_scheme(dst_scheme);
+ }
+ }
+
+ damon_for_each_scheme_safe(src_scheme, next, src) {
+ if (j++ < i)
+ continue;
+ new_scheme = damon_new_scheme(&src_scheme->pattern,
+ src_scheme->action,
+ src_scheme->apply_interval_us,
+ &src_scheme->quota, &src_scheme->wmarks,
+ NUMA_NO_NODE);
+ if (!new_scheme)
+ return -ENOMEM;
+ err = damos_commit(new_scheme, src_scheme);
+ if (err) {
+ damon_destroy_scheme(new_scheme);
+ return err;
+ }
+ damon_add_scheme(dst, new_scheme);
+ }
+ return 0;
+}
+
+static struct damon_target *damon_nth_target(int n, struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ int i = 0;
+
+ damon_for_each_target(t, ctx) {
+ if (i++ == n)
+ return t;
+ }
+ return NULL;
+}
+
+/*
+ * The caller should ensure the regions of @src are
+ * 1. valid (end >= src) and
+ * 2. sorted by starting address.
+ *
+ * If @src has no region, @dst keeps current regions.
+ */
+static int damon_commit_target_regions(struct damon_target *dst,
+ struct damon_target *src, unsigned long src_min_sz_region)
+{
+ struct damon_region *src_region;
+ struct damon_addr_range *ranges;
+ int i = 0, err;
+
+ damon_for_each_region(src_region, src)
+ i++;
+ if (!i)
+ return 0;
+
+ ranges = kmalloc_array(i, sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN);
+ if (!ranges)
+ return -ENOMEM;
+ i = 0;
+ damon_for_each_region(src_region, src)
+ ranges[i++] = src_region->ar;
+ err = damon_set_regions(dst, ranges, i, src_min_sz_region);
+ kfree(ranges);
+ return err;
+}
+
+static int damon_commit_target(
+ struct damon_target *dst, bool dst_has_pid,
+ struct damon_target *src, bool src_has_pid,
+ unsigned long src_min_sz_region)
+{
+ int err;
+
+ err = damon_commit_target_regions(dst, src, src_min_sz_region);
+ if (err)
+ return err;
+ if (dst_has_pid)
+ put_pid(dst->pid);
+ if (src_has_pid)
+ get_pid(src->pid);
+ dst->pid = src->pid;
+ return 0;
+}
+
+static int damon_commit_targets(
+ struct damon_ctx *dst, struct damon_ctx *src)
+{
+ struct damon_target *dst_target, *next, *src_target, *new_target;
+ int i = 0, j = 0, err;
+
+ damon_for_each_target_safe(dst_target, next, dst) {
+ src_target = damon_nth_target(i++, src);
+ /*
+ * If src target is obsolete, do not commit the parameters to
+ * the dst target, and further remove the dst target.
+ */
+ if (src_target && !src_target->obsolete) {
+ err = damon_commit_target(
+ dst_target, damon_target_has_pid(dst),
+ src_target, damon_target_has_pid(src),
+ src->min_sz_region);
+ if (err)
+ return err;
+ } else {
+ struct damos *s;
+
+ damon_destroy_target(dst_target, dst);
+ damon_for_each_scheme(s, dst) {
+ if (s->quota.charge_target_from == dst_target) {
+ s->quota.charge_target_from = NULL;
+ s->quota.charge_addr_from = 0;
+ }
+ }
+ }
+ }
+
+ damon_for_each_target_safe(src_target, next, src) {
+ if (j++ < i)
+ continue;
+ /* target to remove has no matching dst */
+ if (src_target->obsolete)
+ return -EINVAL;
+ new_target = damon_new_target();
+ if (!new_target)
+ return -ENOMEM;
+ err = damon_commit_target(new_target, false,
+ src_target, damon_target_has_pid(src),
+ src->min_sz_region);
+ if (err) {
+ damon_destroy_target(new_target, NULL);
+ return err;
+ }
+ damon_add_target(dst, new_target);
+ }
+ return 0;
+}
+
+/**
+ * damon_commit_ctx() - Commit parameters of a DAMON context to another.
+ * @dst: The commit destination DAMON context.
+ * @src: The commit source DAMON context.
+ *
+ * This function copies user-specified parameters from @src to @dst and update
+ * the internal status and results accordingly. Users should use this function
+ * for context-level parameters update of running context, instead of manual
+ * in-place updates.
+ *
+ * This function should be called from parameters-update safe context, like
+ * damon_call().
+ */
+int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src)
+{
+ int err;
+
+ err = damon_commit_schemes(dst, src);
+ if (err)
+ return err;
+ err = damon_commit_targets(dst, src);
+ if (err)
+ return err;
+ /*
+ * schemes and targets should be updated first, since
+ * 1. damon_set_attrs() updates monitoring results of targets and
+ * next_apply_sis of schemes, and
+ * 2. ops update should be done after pid handling is done (target
+ * committing require putting pids).
+ */
+ if (!damon_attrs_equals(&dst->attrs, &src->attrs)) {
+ err = damon_set_attrs(dst, &src->attrs);
+ if (err)
+ return err;
+ }
+ dst->ops = src->ops;
+ dst->addr_unit = src->addr_unit;
+ dst->min_sz_region = src->min_sz_region;
+
+ return 0;
+}
+
+/**
+ * damon_nr_running_ctxs() - Return number of currently running contexts.
+ */
+int damon_nr_running_ctxs(void)
+{
+ int nr_ctxs;
+
+ mutex_lock(&damon_lock);
+ nr_ctxs = nr_running_ctxs;
+ mutex_unlock(&damon_lock);
+
+ return nr_ctxs;
+}
+
+/* Returns the size upper limit for each monitoring region */
+static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned long sz = 0;
+
+ damon_for_each_target(t, ctx) {
+ damon_for_each_region(r, t)
+ sz += damon_sz_region(r);
+ }
+
+ if (ctx->attrs.min_nr_regions)
+ sz /= ctx->attrs.min_nr_regions;
+ if (sz < ctx->min_sz_region)
+ sz = ctx->min_sz_region;
+
+ return sz;
+}
+
+static int kdamond_fn(void *data);
+
+/*
+ * __damon_start() - Starts monitoring with given context.
+ * @ctx: monitoring context
+ *
+ * This function should be called while damon_lock is hold.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_start(struct damon_ctx *ctx)
+{
+ int err = -EBUSY;
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (!ctx->kdamond) {
+ err = 0;
+ reinit_completion(&ctx->kdamond_started);
+ ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
+ nr_running_ctxs);
+ if (IS_ERR(ctx->kdamond)) {
+ err = PTR_ERR(ctx->kdamond);
+ ctx->kdamond = NULL;
+ } else {
+ wait_for_completion(&ctx->kdamond_started);
+ }
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+
+ return err;
+}
+
+/**
+ * damon_start() - Starts the monitorings for a given group of contexts.
+ * @ctxs: an array of the pointers for contexts to start monitoring
+ * @nr_ctxs: size of @ctxs
+ * @exclusive: exclusiveness of this contexts group
+ *
+ * This function starts a group of monitoring threads for a group of monitoring
+ * contexts. One thread per each context is created and run in parallel. The
+ * caller should handle synchronization between the threads by itself. If
+ * @exclusive is true and a group of threads that created by other
+ * 'damon_start()' call is currently running, this function does nothing but
+ * returns -EBUSY.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive)
+{
+ int i;
+ int err = 0;
+
+ mutex_lock(&damon_lock);
+ if ((exclusive && nr_running_ctxs) ||
+ (!exclusive && running_exclusive_ctxs)) {
+ mutex_unlock(&damon_lock);
+ return -EBUSY;
+ }
+
+ for (i = 0; i < nr_ctxs; i++) {
+ err = __damon_start(ctxs[i]);
+ if (err)
+ break;
+ nr_running_ctxs++;
+ }
+ if (exclusive && nr_running_ctxs)
+ running_exclusive_ctxs = true;
+ mutex_unlock(&damon_lock);
+
+ return err;
+}
+
+/*
+ * __damon_stop() - Stops monitoring of a given context.
+ * @ctx: monitoring context
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_stop(struct damon_ctx *ctx)
+{
+ struct task_struct *tsk;
+
+ mutex_lock(&ctx->kdamond_lock);
+ tsk = ctx->kdamond;
+ if (tsk) {
+ get_task_struct(tsk);
+ mutex_unlock(&ctx->kdamond_lock);
+ kthread_stop_put(tsk);
+ return 0;
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+
+ return -EPERM;
+}
+
+/**
+ * damon_stop() - Stops the monitorings for a given group of contexts.
+ * @ctxs: an array of the pointers for contexts to stop monitoring
+ * @nr_ctxs: size of @ctxs
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
+{
+ int i, err = 0;
+
+ for (i = 0; i < nr_ctxs; i++) {
+ /* nr_running_ctxs is decremented in kdamond_fn */
+ err = __damon_stop(ctxs[i]);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+/**
+ * damon_is_running() - Returns if a given DAMON context is running.
+ * @ctx: The DAMON context to see if running.
+ *
+ * Return: true if @ctx is running, false otherwise.
+ */
+bool damon_is_running(struct damon_ctx *ctx)
+{
+ bool running;
+
+ mutex_lock(&ctx->kdamond_lock);
+ running = ctx->kdamond != NULL;
+ mutex_unlock(&ctx->kdamond_lock);
+ return running;
+}
+
+/**
+ * damon_call() - Invoke a given function on DAMON worker thread (kdamond).
+ * @ctx: DAMON context to call the function for.
+ * @control: Control variable of the call request.
+ *
+ * Ask DAMON worker thread (kdamond) of @ctx to call a function with an
+ * argument data that respectively passed via &damon_call_control->fn and
+ * &damon_call_control->data of @control. If &damon_call_control->repeat of
+ * @control is unset, further wait until the kdamond finishes handling of the
+ * request. Otherwise, return as soon as the request is made.
+ *
+ * The kdamond executes the function with the argument in the main loop, just
+ * after a sampling of the iteration is finished. The function can hence
+ * safely access the internal data of the &struct damon_ctx without additional
+ * synchronization. The return value of the function will be saved in
+ * &damon_call_control->return_code.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
+{
+ if (!control->repeat)
+ init_completion(&control->completion);
+ control->canceled = false;
+ INIT_LIST_HEAD(&control->list);
+
+ mutex_lock(&ctx->call_controls_lock);
+ list_add_tail(&control->list, &ctx->call_controls);
+ mutex_unlock(&ctx->call_controls_lock);
+ if (!damon_is_running(ctx))
+ return -EINVAL;
+ if (control->repeat)
+ return 0;
+ wait_for_completion(&control->completion);
+ if (control->canceled)
+ return -ECANCELED;
+ return 0;
+}
+
+/**
+ * damos_walk() - Invoke a given functions while DAMOS walk regions.
+ * @ctx: DAMON context to call the functions for.
+ * @control: Control variable of the walk request.
+ *
+ * Ask DAMON worker thread (kdamond) of @ctx to call a function for each region
+ * that the kdamond will apply DAMOS action to, and wait until the kdamond
+ * finishes handling of the request.
+ *
+ * The kdamond executes the given function in the main loop, for each region
+ * just after it applied any DAMOS actions of @ctx to it. The invocation is
+ * made only within one &damos->apply_interval_us since damos_walk()
+ * invocation, for each scheme. The given callback function can hence safely
+ * access the internal data of &struct damon_ctx and &struct damon_region that
+ * each of the scheme will apply the action for next interval, without
+ * additional synchronizations against the kdamond. If every scheme of @ctx
+ * passed at least one &damos->apply_interval_us, kdamond marks the request as
+ * completed so that damos_walk() can wakeup and return.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control)
+{
+ init_completion(&control->completion);
+ control->canceled = false;
+ mutex_lock(&ctx->walk_control_lock);
+ if (ctx->walk_control) {
+ mutex_unlock(&ctx->walk_control_lock);
+ return -EBUSY;
+ }
+ ctx->walk_control = control;
+ mutex_unlock(&ctx->walk_control_lock);
+ if (!damon_is_running(ctx))
+ return -EINVAL;
+ wait_for_completion(&control->completion);
+ if (control->canceled)
+ return -ECANCELED;
+ return 0;
+}
+
+/*
+ * Warn and fix corrupted ->nr_accesses[_bp] for investigations and preventing
+ * the problem being propagated.
+ */
+static void damon_warn_fix_nr_accesses_corruption(struct damon_region *r)
+{
+ if (r->nr_accesses_bp == r->nr_accesses * 10000)
+ return;
+ WARN_ONCE(true, "invalid nr_accesses_bp at reset: %u %u\n",
+ r->nr_accesses_bp, r->nr_accesses);
+ r->nr_accesses_bp = r->nr_accesses * 10000;
+}
+
+/*
+ * Reset the aggregated monitoring results ('nr_accesses' of each region).
+ */
+static void kdamond_reset_aggregated(struct damon_ctx *c)
+{
+ struct damon_target *t;
+ unsigned int ti = 0; /* target's index */
+
+ damon_for_each_target(t, c) {
+ struct damon_region *r;
+
+ damon_for_each_region(r, t) {
+ trace_damon_aggregated(ti, r, damon_nr_regions(t));
+ damon_warn_fix_nr_accesses_corruption(r);
+ r->last_nr_accesses = r->nr_accesses;
+ r->nr_accesses = 0;
+ }
+ ti++;
+ }
+}
+
+static unsigned long damon_get_intervals_score(struct damon_ctx *c)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned long sz_region, max_access_events = 0, access_events = 0;
+ unsigned long target_access_events;
+ unsigned long goal_bp = c->attrs.intervals_goal.access_bp;
+
+ damon_for_each_target(t, c) {
+ damon_for_each_region(r, t) {
+ sz_region = damon_sz_region(r);
+ max_access_events += sz_region * c->attrs.aggr_samples;
+ access_events += sz_region * r->nr_accesses;
+ }
+ }
+ target_access_events = max_access_events * goal_bp / 10000;
+ target_access_events = target_access_events ? : 1;
+ return access_events * 10000 / target_access_events;
+}
+
+static unsigned long damon_feed_loop_next_input(unsigned long last_input,
+ unsigned long score);
+
+static unsigned long damon_get_intervals_adaptation_bp(struct damon_ctx *c)
+{
+ unsigned long score_bp, adaptation_bp;
+
+ score_bp = damon_get_intervals_score(c);
+ adaptation_bp = damon_feed_loop_next_input(100000000, score_bp) /
+ 10000;
+ /*
+ * adaptaion_bp ranges from 1 to 20,000. Avoid too rapid reduction of
+ * the intervals by rescaling [1,10,000] to [5000, 10,000].
+ */
+ if (adaptation_bp <= 10000)
+ adaptation_bp = 5000 + adaptation_bp / 2;
+ return adaptation_bp;
+}
+
+static void kdamond_tune_intervals(struct damon_ctx *c)
+{
+ unsigned long adaptation_bp;
+ struct damon_attrs new_attrs;
+ struct damon_intervals_goal *goal;
+
+ adaptation_bp = damon_get_intervals_adaptation_bp(c);
+ if (adaptation_bp == 10000)
+ return;
+
+ new_attrs = c->attrs;
+ goal = &c->attrs.intervals_goal;
+ new_attrs.sample_interval = min(goal->max_sample_us,
+ c->attrs.sample_interval * adaptation_bp / 10000);
+ new_attrs.sample_interval = max(goal->min_sample_us,
+ new_attrs.sample_interval);
+ new_attrs.aggr_interval = new_attrs.sample_interval *
+ c->attrs.aggr_samples;
+ trace_damon_monitor_intervals_tune(new_attrs.sample_interval);
+ damon_set_attrs(c, &new_attrs);
+}
+
+static void damon_split_region_at(struct damon_target *t,
+ struct damon_region *r, unsigned long sz_r);
+
+static bool __damos_valid_target(struct damon_region *r, struct damos *s)
+{
+ unsigned long sz;
+ unsigned int nr_accesses = r->nr_accesses_bp / 10000;
+
+ sz = damon_sz_region(r);
+ return s->pattern.min_sz_region <= sz &&
+ sz <= s->pattern.max_sz_region &&
+ s->pattern.min_nr_accesses <= nr_accesses &&
+ nr_accesses <= s->pattern.max_nr_accesses &&
+ s->pattern.min_age_region <= r->age &&
+ r->age <= s->pattern.max_age_region;
+}
+
+static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
+ struct damon_region *r, struct damos *s)
+{
+ bool ret = __damos_valid_target(r, s);
+
+ if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
+ return ret;
+
+ return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
+}
+
+/*
+ * damos_skip_charged_region() - Check if the given region or starting part of
+ * it is already charged for the DAMOS quota.
+ * @t: The target of the region.
+ * @rp: The pointer to the region.
+ * @s: The scheme to be applied.
+ * @min_sz_region: minimum region size.
+ *
+ * If a quota of a scheme has exceeded in a quota charge window, the scheme's
+ * action would applied to only a part of the target access pattern fulfilling
+ * regions. To avoid applying the scheme action to only already applied
+ * regions, DAMON skips applying the scheme action to the regions that charged
+ * in the previous charge window.
+ *
+ * This function checks if a given region should be skipped or not for the
+ * reason. If only the starting part of the region has previously charged,
+ * this function splits the region into two so that the second one covers the
+ * area that not charged in the previous charge widnow and saves the second
+ * region in *rp and returns false, so that the caller can apply DAMON action
+ * to the second one.
+ *
+ * Return: true if the region should be entirely skipped, false otherwise.
+ */
+static bool damos_skip_charged_region(struct damon_target *t,
+ struct damon_region **rp, struct damos *s, unsigned long min_sz_region)
+{
+ struct damon_region *r = *rp;
+ struct damos_quota *quota = &s->quota;
+ unsigned long sz_to_skip;
+
+ /* Skip previously charged regions */
+ if (quota->charge_target_from) {
+ if (t != quota->charge_target_from)
+ return true;
+ if (r == damon_last_region(t)) {
+ quota->charge_target_from = NULL;
+ quota->charge_addr_from = 0;
+ return true;
+ }
+ if (quota->charge_addr_from &&
+ r->ar.end <= quota->charge_addr_from)
+ return true;
+
+ if (quota->charge_addr_from && r->ar.start <
+ quota->charge_addr_from) {
+ sz_to_skip = ALIGN_DOWN(quota->charge_addr_from -
+ r->ar.start, min_sz_region);
+ if (!sz_to_skip) {
+ if (damon_sz_region(r) <= min_sz_region)
+ return true;
+ sz_to_skip = min_sz_region;
+ }
+ damon_split_region_at(t, r, sz_to_skip);
+ r = damon_next_region(r);
+ *rp = r;
+ }
+ quota->charge_target_from = NULL;
+ quota->charge_addr_from = 0;
+ }
+ return false;
+}
+
+static void damos_update_stat(struct damos *s,
+ unsigned long sz_tried, unsigned long sz_applied,
+ unsigned long sz_ops_filter_passed)
+{
+ s->stat.nr_tried++;
+ s->stat.sz_tried += sz_tried;
+ if (sz_applied)
+ s->stat.nr_applied++;
+ s->stat.sz_applied += sz_applied;
+ s->stat.sz_ops_filter_passed += sz_ops_filter_passed;
+}
+
+static bool damos_filter_match(struct damon_ctx *ctx, struct damon_target *t,
+ struct damon_region *r, struct damos_filter *filter,
+ unsigned long min_sz_region)
+{
+ bool matched = false;
+ struct damon_target *ti;
+ int target_idx = 0;
+ unsigned long start, end;
+
+ switch (filter->type) {
+ case DAMOS_FILTER_TYPE_TARGET:
+ damon_for_each_target(ti, ctx) {
+ if (ti == t)
+ break;
+ target_idx++;
+ }
+ matched = target_idx == filter->target_idx;
+ break;
+ case DAMOS_FILTER_TYPE_ADDR:
+ start = ALIGN_DOWN(filter->addr_range.start, min_sz_region);
+ end = ALIGN_DOWN(filter->addr_range.end, min_sz_region);
+
+ /* inside the range */
+ if (start <= r->ar.start && r->ar.end <= end) {
+ matched = true;
+ break;
+ }
+ /* outside of the range */
+ if (r->ar.end <= start || end <= r->ar.start) {
+ matched = false;
+ break;
+ }
+ /* start before the range and overlap */
+ if (r->ar.start < start) {
+ damon_split_region_at(t, r, start - r->ar.start);
+ matched = false;
+ break;
+ }
+ /* start inside the range */
+ damon_split_region_at(t, r, end - r->ar.start);
+ matched = true;
+ break;
+ default:
+ return false;
+ }
+
+ return matched == filter->matching;
+}
+
+static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
+ struct damon_region *r, struct damos *s)
+{
+ struct damos_filter *filter;
+
+ s->core_filters_allowed = false;
+ damos_for_each_core_filter(filter, s) {
+ if (damos_filter_match(ctx, t, r, filter, ctx->min_sz_region)) {
+ if (filter->allow)
+ s->core_filters_allowed = true;
+ return !filter->allow;
+ }
+ }
+ return s->core_filters_default_reject;
+}
+
+/*
+ * damos_walk_call_walk() - Call &damos_walk_control->walk_fn.
+ * @ctx: The context of &damon_ctx->walk_control.
+ * @t: The monitoring target of @r that @s will be applied.
+ * @r: The region of @t that @s will be applied.
+ * @s: The scheme of @ctx that will be applied to @r.
+ *
+ * This function is called from kdamond whenever it asked the operation set to
+ * apply a DAMOS scheme action to a region. If a DAMOS walk request is
+ * installed by damos_walk() and not yet uninstalled, invoke it.
+ */
+static void damos_walk_call_walk(struct damon_ctx *ctx, struct damon_target *t,
+ struct damon_region *r, struct damos *s,
+ unsigned long sz_filter_passed)
+{
+ struct damos_walk_control *control;
+
+ if (s->walk_completed)
+ return;
+
+ control = ctx->walk_control;
+ if (!control)
+ return;
+
+ control->walk_fn(control->data, ctx, t, r, s, sz_filter_passed);
+}
+
+/*
+ * damos_walk_complete() - Complete DAMOS walk request if all walks are done.
+ * @ctx: The context of &damon_ctx->walk_control.
+ * @s: A scheme of @ctx that all walks are now done.
+ *
+ * This function is called when kdamond finished applying the action of a DAMOS
+ * scheme to all regions that eligible for the given &damos->apply_interval_us.
+ * If every scheme of @ctx including @s now finished walking for at least one
+ * &damos->apply_interval_us, this function makrs the handling of the given
+ * DAMOS walk request is done, so that damos_walk() can wake up and return.
+ */
+static void damos_walk_complete(struct damon_ctx *ctx, struct damos *s)
+{
+ struct damos *siter;
+ struct damos_walk_control *control;
+
+ control = ctx->walk_control;
+ if (!control)
+ return;
+
+ s->walk_completed = true;
+ /* if all schemes completed, signal completion to walker */
+ damon_for_each_scheme(siter, ctx) {
+ if (!siter->walk_completed)
+ return;
+ }
+ damon_for_each_scheme(siter, ctx)
+ siter->walk_completed = false;
+
+ complete(&control->completion);
+ ctx->walk_control = NULL;
+}
+
+/*
+ * damos_walk_cancel() - Cancel the current DAMOS walk request.
+ * @ctx: The context of &damon_ctx->walk_control.
+ *
+ * This function is called when @ctx is deactivated by DAMOS watermarks, DAMOS
+ * walk is requested but there is no DAMOS scheme to walk for, or the kdamond
+ * is already out of the main loop and therefore gonna be terminated, and hence
+ * cannot continue the walks. This function therefore marks the walk request
+ * as canceled, so that damos_walk() can wake up and return.
+ */
+static void damos_walk_cancel(struct damon_ctx *ctx)
+{
+ struct damos_walk_control *control;
+
+ mutex_lock(&ctx->walk_control_lock);
+ control = ctx->walk_control;
+ mutex_unlock(&ctx->walk_control_lock);
+
+ if (!control)
+ return;
+ control->canceled = true;
+ complete(&control->completion);
+ mutex_lock(&ctx->walk_control_lock);
+ ctx->walk_control = NULL;
+ mutex_unlock(&ctx->walk_control_lock);
+}
+
+static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
+ struct damon_region *r, struct damos *s)
+{
+ struct damos_quota *quota = &s->quota;
+ unsigned long sz = damon_sz_region(r);
+ struct timespec64 begin, end;
+ unsigned long sz_applied = 0;
+ unsigned long sz_ops_filter_passed = 0;
+ /*
+ * We plan to support multiple context per kdamond, as DAMON sysfs
+ * implies with 'nr_contexts' file. Nevertheless, only single context
+ * per kdamond is supported for now. So, we can simply use '0' context
+ * index here.
+ */
+ unsigned int cidx = 0;
+ struct damos *siter; /* schemes iterator */
+ unsigned int sidx = 0;
+ struct damon_target *titer; /* targets iterator */
+ unsigned int tidx = 0;
+ bool do_trace = false;
+
+ /* get indices for trace_damos_before_apply() */
+ if (trace_damos_before_apply_enabled()) {
+ damon_for_each_scheme(siter, c) {
+ if (siter == s)
+ break;
+ sidx++;
+ }
+ damon_for_each_target(titer, c) {
+ if (titer == t)
+ break;
+ tidx++;
+ }
+ do_trace = true;
+ }
+
+ if (c->ops.apply_scheme) {
+ if (quota->esz && quota->charged_sz + sz > quota->esz) {
+ sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
+ c->min_sz_region);
+ if (!sz)
+ goto update_stat;
+ damon_split_region_at(t, r, sz);
+ }
+ if (damos_filter_out(c, t, r, s))
+ return;
+ ktime_get_coarse_ts64(&begin);
+ trace_damos_before_apply(cidx, sidx, tidx, r,
+ damon_nr_regions(t), do_trace);
+ sz_applied = c->ops.apply_scheme(c, t, r, s,
+ &sz_ops_filter_passed);
+ damos_walk_call_walk(c, t, r, s, sz_ops_filter_passed);
+ ktime_get_coarse_ts64(&end);
+ quota->total_charged_ns += timespec64_to_ns(&end) -
+ timespec64_to_ns(&begin);
+ quota->charged_sz += sz;
+ if (quota->esz && quota->charged_sz >= quota->esz) {
+ quota->charge_target_from = t;
+ quota->charge_addr_from = r->ar.end + 1;
+ }
+ }
+ if (s->action != DAMOS_STAT)
+ r->age = 0;
+
+update_stat:
+ damos_update_stat(s, sz, sz_applied, sz_ops_filter_passed);
+}
+
+static void damon_do_apply_schemes(struct damon_ctx *c,
+ struct damon_target *t,
+ struct damon_region *r)
+{
+ struct damos *s;
+
+ damon_for_each_scheme(s, c) {
+ struct damos_quota *quota = &s->quota;
+
+ if (c->passed_sample_intervals < s->next_apply_sis)
+ continue;
+
+ if (!s->wmarks.activated)
+ continue;
+
+ /* Check the quota */
+ if (quota->esz && quota->charged_sz >= quota->esz)
+ continue;
+
+ if (damos_skip_charged_region(t, &r, s, c->min_sz_region))
+ continue;
+
+ if (!damos_valid_target(c, t, r, s))
+ continue;
+
+ damos_apply_scheme(c, t, r, s);
+ }
+}
+
+/*
+ * damon_feed_loop_next_input() - get next input to achieve a target score.
+ * @last_input The last input.
+ * @score Current score that made with @last_input.
+ *
+ * Calculate next input to achieve the target score, based on the last input
+ * and current score. Assuming the input and the score are positively
+ * proportional, calculate how much compensation should be added to or
+ * subtracted from the last input as a proportion of the last input. Avoid
+ * next input always being zero by setting it non-zero always. In short form
+ * (assuming support of float and signed calculations), the algorithm is as
+ * below.
+ *
+ * next_input = max(last_input * ((goal - current) / goal + 1), 1)
+ *
+ * For simple implementation, we assume the target score is always 10,000. The
+ * caller should adjust @score for this.
+ *
+ * Returns next input that assumed to achieve the target score.
+ */
+static unsigned long damon_feed_loop_next_input(unsigned long last_input,
+ unsigned long score)
+{
+ const unsigned long goal = 10000;
+ /* Set minimum input as 10000 to avoid compensation be zero */
+ const unsigned long min_input = 10000;
+ unsigned long score_goal_diff, compensation;
+ bool over_achieving = score > goal;
+
+ if (score == goal)
+ return last_input;
+ if (score >= goal * 2)
+ return min_input;
+
+ if (over_achieving)
+ score_goal_diff = score - goal;
+ else
+ score_goal_diff = goal - score;
+
+ if (last_input < ULONG_MAX / score_goal_diff)
+ compensation = last_input * score_goal_diff / goal;
+ else
+ compensation = last_input / goal * score_goal_diff;
+
+ if (over_achieving)
+ return max(last_input - compensation, min_input);
+ if (last_input < ULONG_MAX - compensation)
+ return last_input + compensation;
+ return ULONG_MAX;
+}
+
+#ifdef CONFIG_PSI
+
+static u64 damos_get_some_mem_psi_total(void)
+{
+ if (static_branch_likely(&psi_disabled))
+ return 0;
+ return div_u64(psi_system.total[PSI_AVGS][PSI_MEM * 2],
+ NSEC_PER_USEC);
+}
+
+#else /* CONFIG_PSI */
+
+static inline u64 damos_get_some_mem_psi_total(void)
+{
+ return 0;
+};
+
+#endif /* CONFIG_PSI */
+
+#ifdef CONFIG_NUMA
+static __kernel_ulong_t damos_get_node_mem_bp(
+ struct damos_quota_goal *goal)
+{
+ struct sysinfo i;
+ __kernel_ulong_t numerator;
+
+ si_meminfo_node(&i, goal->nid);
+ if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP)
+ numerator = i.totalram - i.freeram;
+ else /* DAMOS_QUOTA_NODE_MEM_FREE_BP */
+ numerator = i.freeram;
+ return numerator * 10000 / i.totalram;
+}
+
+static unsigned long damos_get_node_memcg_used_bp(
+ struct damos_quota_goal *goal)
+{
+ struct mem_cgroup *memcg;
+ struct lruvec *lruvec;
+ unsigned long used_pages, numerator;
+ struct sysinfo i;
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_id(goal->memcg_id);
+ rcu_read_unlock();
+ if (!memcg) {
+ if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP)
+ return 0;
+ else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */
+ return 10000;
+ }
+ mem_cgroup_flush_stats(memcg);
+ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(goal->nid));
+ used_pages = lruvec_page_state(lruvec, NR_ACTIVE_ANON);
+ used_pages += lruvec_page_state(lruvec, NR_INACTIVE_ANON);
+ used_pages += lruvec_page_state(lruvec, NR_ACTIVE_FILE);
+ used_pages += lruvec_page_state(lruvec, NR_INACTIVE_FILE);
+
+ si_meminfo_node(&i, goal->nid);
+ if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP)
+ numerator = used_pages;
+ else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */
+ numerator = i.totalram - used_pages;
+ return numerator * 10000 / i.totalram;
+}
+#else
+static __kernel_ulong_t damos_get_node_mem_bp(
+ struct damos_quota_goal *goal)
+{
+ return 0;
+}
+
+static unsigned long damos_get_node_memcg_used_bp(
+ struct damos_quota_goal *goal)
+{
+ return 0;
+}
+#endif
+
+
+static void damos_set_quota_goal_current_value(struct damos_quota_goal *goal)
+{
+ u64 now_psi_total;
+
+ switch (goal->metric) {
+ case DAMOS_QUOTA_USER_INPUT:
+ /* User should already set goal->current_value */
+ break;
+ case DAMOS_QUOTA_SOME_MEM_PSI_US:
+ now_psi_total = damos_get_some_mem_psi_total();
+ goal->current_value = now_psi_total - goal->last_psi_total;
+ goal->last_psi_total = now_psi_total;
+ break;
+ case DAMOS_QUOTA_NODE_MEM_USED_BP:
+ case DAMOS_QUOTA_NODE_MEM_FREE_BP:
+ goal->current_value = damos_get_node_mem_bp(goal);
+ break;
+ case DAMOS_QUOTA_NODE_MEMCG_USED_BP:
+ case DAMOS_QUOTA_NODE_MEMCG_FREE_BP:
+ goal->current_value = damos_get_node_memcg_used_bp(goal);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Return the highest score since it makes schemes least aggressive */
+static unsigned long damos_quota_score(struct damos_quota *quota)
+{
+ struct damos_quota_goal *goal;
+ unsigned long highest_score = 0;
+
+ damos_for_each_quota_goal(goal, quota) {
+ damos_set_quota_goal_current_value(goal);
+ highest_score = max(highest_score,
+ goal->current_value * 10000 /
+ goal->target_value);
+ }
+
+ return highest_score;
+}
+
+/*
+ * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty
+ */
+static void damos_set_effective_quota(struct damos_quota *quota)
+{
+ unsigned long throughput;
+ unsigned long esz = ULONG_MAX;
+
+ if (!quota->ms && list_empty(&quota->goals)) {
+ quota->esz = quota->sz;
+ return;
+ }
+
+ if (!list_empty(&quota->goals)) {
+ unsigned long score = damos_quota_score(quota);
+
+ quota->esz_bp = damon_feed_loop_next_input(
+ max(quota->esz_bp, 10000UL),
+ score);
+ esz = quota->esz_bp / 10000;
+ }
+
+ if (quota->ms) {
+ if (quota->total_charged_ns)
+ throughput = mult_frac(quota->total_charged_sz, 1000000,
+ quota->total_charged_ns);
+ else
+ throughput = PAGE_SIZE * 1024;
+ esz = min(throughput * quota->ms, esz);
+ }
+
+ if (quota->sz && quota->sz < esz)
+ esz = quota->sz;
+
+ quota->esz = esz;
+}
+
+static void damos_trace_esz(struct damon_ctx *c, struct damos *s,
+ struct damos_quota *quota)
+{
+ unsigned int cidx = 0, sidx = 0;
+ struct damos *siter;
+
+ damon_for_each_scheme(siter, c) {
+ if (siter == s)
+ break;
+ sidx++;
+ }
+ trace_damos_esz(cidx, sidx, quota->esz);
+}
+
+static void damos_adjust_quota(struct damon_ctx *c, struct damos *s)
+{
+ struct damos_quota *quota = &s->quota;
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned long cumulated_sz, cached_esz;
+ unsigned int score, max_score = 0;
+
+ if (!quota->ms && !quota->sz && list_empty(&quota->goals))
+ return;
+
+ /* First charge window */
+ if (!quota->total_charged_sz && !quota->charged_from) {
+ quota->charged_from = jiffies;
+ damos_set_effective_quota(quota);
+ }
+
+ /* New charge window starts */
+ if (time_after_eq(jiffies, quota->charged_from +
+ msecs_to_jiffies(quota->reset_interval))) {
+ if (quota->esz && quota->charged_sz >= quota->esz)
+ s->stat.qt_exceeds++;
+ quota->total_charged_sz += quota->charged_sz;
+ quota->charged_from = jiffies;
+ quota->charged_sz = 0;
+ if (trace_damos_esz_enabled())
+ cached_esz = quota->esz;
+ damos_set_effective_quota(quota);
+ if (trace_damos_esz_enabled() && quota->esz != cached_esz)
+ damos_trace_esz(c, s, quota);
+ }
+
+ if (!c->ops.get_scheme_score)
+ return;
+
+ /* Fill up the score histogram */
+ memset(c->regions_score_histogram, 0,
+ sizeof(*c->regions_score_histogram) *
+ (DAMOS_MAX_SCORE + 1));
+ damon_for_each_target(t, c) {
+ damon_for_each_region(r, t) {
+ if (!__damos_valid_target(r, s))
+ continue;
+ score = c->ops.get_scheme_score(c, t, r, s);
+ c->regions_score_histogram[score] +=
+ damon_sz_region(r);
+ if (score > max_score)
+ max_score = score;
+ }
+ }
+
+ /* Set the min score limit */
+ for (cumulated_sz = 0, score = max_score; ; score--) {
+ cumulated_sz += c->regions_score_histogram[score];
+ if (cumulated_sz >= quota->esz || !score)
+ break;
+ }
+ quota->min_score = score;
+}
+
+static void kdamond_apply_schemes(struct damon_ctx *c)
+{
+ struct damon_target *t;
+ struct damon_region *r, *next_r;
+ struct damos *s;
+ unsigned long sample_interval = c->attrs.sample_interval ?
+ c->attrs.sample_interval : 1;
+ bool has_schemes_to_apply = false;
+
+ damon_for_each_scheme(s, c) {
+ if (c->passed_sample_intervals < s->next_apply_sis)
+ continue;
+
+ if (!s->wmarks.activated)
+ continue;
+
+ has_schemes_to_apply = true;
+
+ damos_adjust_quota(c, s);
+ }
+
+ if (!has_schemes_to_apply)
+ return;
+
+ mutex_lock(&c->walk_control_lock);
+ damon_for_each_target(t, c) {
+ damon_for_each_region_safe(r, next_r, t)
+ damon_do_apply_schemes(c, t, r);
+ }
+
+ damon_for_each_scheme(s, c) {
+ if (c->passed_sample_intervals < s->next_apply_sis)
+ continue;
+ damos_walk_complete(c, s);
+ s->next_apply_sis = c->passed_sample_intervals +
+ (s->apply_interval_us ? s->apply_interval_us :
+ c->attrs.aggr_interval) / sample_interval;
+ s->last_applied = NULL;
+ }
+ mutex_unlock(&c->walk_control_lock);
+}
+
+/*
+ * Merge two adjacent regions into one region
+ */
+static void damon_merge_two_regions(struct damon_target *t,
+ struct damon_region *l, struct damon_region *r)
+{
+ unsigned long sz_l = damon_sz_region(l), sz_r = damon_sz_region(r);
+
+ l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
+ (sz_l + sz_r);
+ l->nr_accesses_bp = l->nr_accesses * 10000;
+ l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
+ l->ar.end = r->ar.end;
+ damon_destroy_region(r, t);
+}
+
+/*
+ * Merge adjacent regions having similar access frequencies
+ *
+ * t target affected by this merge operation
+ * thres '->nr_accesses' diff threshold for the merge
+ * sz_limit size upper limit of each region
+ */
+static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
+ unsigned long sz_limit)
+{
+ struct damon_region *r, *prev = NULL, *next;
+
+ damon_for_each_region_safe(r, next, t) {
+ if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
+ r->age = 0;
+ else if ((r->nr_accesses == 0) != (r->last_nr_accesses == 0))
+ r->age = 0;
+ else
+ r->age++;
+
+ if (prev && prev->ar.end == r->ar.start &&
+ abs(prev->nr_accesses - r->nr_accesses) <= thres &&
+ damon_sz_region(prev) + damon_sz_region(r) <= sz_limit)
+ damon_merge_two_regions(t, prev, r);
+ else
+ prev = r;
+ }
+}
+
+/*
+ * Merge adjacent regions having similar access frequencies
+ *
+ * threshold '->nr_accesses' diff threshold for the merge
+ * sz_limit size upper limit of each region
+ *
+ * This function merges monitoring target regions which are adjacent and their
+ * access frequencies are similar. This is for minimizing the monitoring
+ * overhead under the dynamically changeable access pattern. If a merge was
+ * unnecessarily made, later 'kdamond_split_regions()' will revert it.
+ *
+ * The total number of regions could be higher than the user-defined limit,
+ * max_nr_regions for some cases. For example, the user can update
+ * max_nr_regions to a number that lower than the current number of regions
+ * while DAMON is running. For such a case, repeat merging until the limit is
+ * met while increasing @threshold up to possible maximum level.
+ */
+static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
+ unsigned long sz_limit)
+{
+ struct damon_target *t;
+ unsigned int nr_regions;
+ unsigned int max_thres;
+
+ max_thres = c->attrs.aggr_interval /
+ (c->attrs.sample_interval ? c->attrs.sample_interval : 1);
+ do {
+ nr_regions = 0;
+ damon_for_each_target(t, c) {
+ damon_merge_regions_of(t, threshold, sz_limit);
+ nr_regions += damon_nr_regions(t);
+ }
+ threshold = max(1, threshold * 2);
+ } while (nr_regions > c->attrs.max_nr_regions &&
+ threshold / 2 < max_thres);
+}
+
+/*
+ * Split a region in two
+ *
+ * r the region to be split
+ * sz_r size of the first sub-region that will be made
+ */
+static void damon_split_region_at(struct damon_target *t,
+ struct damon_region *r, unsigned long sz_r)
+{
+ struct damon_region *new;
+
+ new = damon_new_region(r->ar.start + sz_r, r->ar.end);
+ if (!new)
+ return;
+
+ r->ar.end = new->ar.start;
+
+ new->age = r->age;
+ new->last_nr_accesses = r->last_nr_accesses;
+ new->nr_accesses_bp = r->nr_accesses_bp;
+ new->nr_accesses = r->nr_accesses;
+
+ damon_insert_region(new, r, damon_next_region(r), t);
+}
+
+/* Split every region in the given target into 'nr_subs' regions */
+static void damon_split_regions_of(struct damon_target *t, int nr_subs,
+ unsigned long min_sz_region)
+{
+ struct damon_region *r, *next;
+ unsigned long sz_region, sz_sub = 0;
+ int i;
+
+ damon_for_each_region_safe(r, next, t) {
+ sz_region = damon_sz_region(r);
+
+ for (i = 0; i < nr_subs - 1 &&
+ sz_region > 2 * min_sz_region; i++) {
+ /*
+ * Randomly select size of left sub-region to be at
+ * least 10 percent and at most 90% of original region
+ */
+ sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
+ sz_region / 10, min_sz_region);
+ /* Do not allow blank region */
+ if (sz_sub == 0 || sz_sub >= sz_region)
+ continue;
+
+ damon_split_region_at(t, r, sz_sub);
+ sz_region = sz_sub;
+ }
+ }
+}
+
+/*
+ * Split every target region into randomly-sized small regions
+ *
+ * This function splits every target region into random-sized small regions if
+ * current total number of the regions is equal or smaller than half of the
+ * user-specified maximum number of regions. This is for maximizing the
+ * monitoring accuracy under the dynamically changeable access patterns. If a
+ * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
+ * it.
+ */
+static void kdamond_split_regions(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ unsigned int nr_regions = 0;
+ static unsigned int last_nr_regions;
+ int nr_subregions = 2;
+
+ damon_for_each_target(t, ctx)
+ nr_regions += damon_nr_regions(t);
+
+ if (nr_regions > ctx->attrs.max_nr_regions / 2)
+ return;
+
+ /* Maybe the middle of the region has different access frequency */
+ if (last_nr_regions == nr_regions &&
+ nr_regions < ctx->attrs.max_nr_regions / 3)
+ nr_subregions = 3;
+
+ damon_for_each_target(t, ctx)
+ damon_split_regions_of(t, nr_subregions, ctx->min_sz_region);
+
+ last_nr_regions = nr_regions;
+}
+
+/*
+ * Check whether current monitoring should be stopped
+ *
+ * The monitoring is stopped when either the user requested to stop, or all
+ * monitoring targets are invalid.
+ *
+ * Returns true if need to stop current monitoring.
+ */
+static bool kdamond_need_stop(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+
+ if (kthread_should_stop())
+ return true;
+
+ if (!ctx->ops.target_valid)
+ return false;
+
+ damon_for_each_target(t, ctx) {
+ if (ctx->ops.target_valid(t))
+ return false;
+ }
+
+ return true;
+}
+
+static int damos_get_wmark_metric_value(enum damos_wmark_metric metric,
+ unsigned long *metric_value)
+{
+ switch (metric) {
+ case DAMOS_WMARK_FREE_MEM_RATE:
+ *metric_value = global_zone_page_state(NR_FREE_PAGES) * 1000 /
+ totalram_pages();
+ return 0;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+/*
+ * Returns zero if the scheme is active. Else, returns time to wait for next
+ * watermark check in micro-seconds.
+ */
+static unsigned long damos_wmark_wait_us(struct damos *scheme)
+{
+ unsigned long metric;
+
+ if (damos_get_wmark_metric_value(scheme->wmarks.metric, &metric))
+ return 0;
+
+ /* higher than high watermark or lower than low watermark */
+ if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
+ if (scheme->wmarks.activated)
+ pr_debug("deactivate a scheme (%d) for %s wmark\n",
+ scheme->action,
+ str_high_low(metric > scheme->wmarks.high));
+ scheme->wmarks.activated = false;
+ return scheme->wmarks.interval;
+ }
+
+ /* inactive and higher than middle watermark */
+ if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
+ !scheme->wmarks.activated)
+ return scheme->wmarks.interval;
+
+ if (!scheme->wmarks.activated)
+ pr_debug("activate a scheme (%d)\n", scheme->action);
+ scheme->wmarks.activated = true;
+ return 0;
+}
+
+static void kdamond_usleep(unsigned long usecs)
+{
+ if (usecs >= USLEEP_RANGE_UPPER_BOUND)
+ schedule_timeout_idle(usecs_to_jiffies(usecs));
+ else
+ usleep_range_idle(usecs, usecs + 1);
+}
+
+/*
+ * kdamond_call() - handle damon_call_control objects.
+ * @ctx: The &struct damon_ctx of the kdamond.
+ * @cancel: Whether to cancel the invocation of the function.
+ *
+ * If there are &struct damon_call_control requests that registered via
+ * &damon_call() on @ctx, do or cancel the invocation of the function depending
+ * on @cancel. @cancel is set when the kdamond is already out of the main loop
+ * and therefore will be terminated.
+ */
+static void kdamond_call(struct damon_ctx *ctx, bool cancel)
+{
+ struct damon_call_control *control;
+ LIST_HEAD(repeat_controls);
+ int ret = 0;
+
+ while (true) {
+ mutex_lock(&ctx->call_controls_lock);
+ control = list_first_entry_or_null(&ctx->call_controls,
+ struct damon_call_control, list);
+ mutex_unlock(&ctx->call_controls_lock);
+ if (!control)
+ break;
+ if (cancel) {
+ control->canceled = true;
+ } else {
+ ret = control->fn(control->data);
+ control->return_code = ret;
+ }
+ mutex_lock(&ctx->call_controls_lock);
+ list_del(&control->list);
+ mutex_unlock(&ctx->call_controls_lock);
+ if (!control->repeat) {
+ complete(&control->completion);
+ } else if (control->canceled && control->dealloc_on_cancel) {
+ kfree(control);
+ continue;
+ } else {
+ list_add(&control->list, &repeat_controls);
+ }
+ }
+ control = list_first_entry_or_null(&repeat_controls,
+ struct damon_call_control, list);
+ if (!control || cancel)
+ return;
+ mutex_lock(&ctx->call_controls_lock);
+ list_add_tail(&control->list, &ctx->call_controls);
+ mutex_unlock(&ctx->call_controls_lock);
+}
+
+/* Returns negative error code if it's not activated but should return */
+static int kdamond_wait_activation(struct damon_ctx *ctx)
+{
+ struct damos *s;
+ unsigned long wait_time;
+ unsigned long min_wait_time = 0;
+ bool init_wait_time = false;
+
+ while (!kdamond_need_stop(ctx)) {
+ damon_for_each_scheme(s, ctx) {
+ wait_time = damos_wmark_wait_us(s);
+ if (!init_wait_time || wait_time < min_wait_time) {
+ init_wait_time = true;
+ min_wait_time = wait_time;
+ }
+ }
+ if (!min_wait_time)
+ return 0;
+
+ kdamond_usleep(min_wait_time);
+
+ kdamond_call(ctx, false);
+ damos_walk_cancel(ctx);
+ }
+ return -EBUSY;
+}
+
+static void kdamond_init_ctx(struct damon_ctx *ctx)
+{
+ unsigned long sample_interval = ctx->attrs.sample_interval ?
+ ctx->attrs.sample_interval : 1;
+ unsigned long apply_interval;
+ struct damos *scheme;
+
+ ctx->passed_sample_intervals = 0;
+ ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval;
+ ctx->next_ops_update_sis = ctx->attrs.ops_update_interval /
+ sample_interval;
+ ctx->next_intervals_tune_sis = ctx->next_aggregation_sis *
+ ctx->attrs.intervals_goal.aggrs;
+
+ damon_for_each_scheme(scheme, ctx) {
+ apply_interval = scheme->apply_interval_us ?
+ scheme->apply_interval_us : ctx->attrs.aggr_interval;
+ scheme->next_apply_sis = apply_interval / sample_interval;
+ damos_set_filters_default_reject(scheme);
+ }
+}
+
+/*
+ * The monitoring daemon that runs as a kernel thread
+ */
+static int kdamond_fn(void *data)
+{
+ struct damon_ctx *ctx = data;
+ struct damon_target *t;
+ struct damon_region *r, *next;
+ unsigned int max_nr_accesses = 0;
+ unsigned long sz_limit = 0;
+
+ pr_debug("kdamond (%d) starts\n", current->pid);
+
+ complete(&ctx->kdamond_started);
+ kdamond_init_ctx(ctx);
+
+ if (ctx->ops.init)
+ ctx->ops.init(ctx);
+ ctx->regions_score_histogram = kmalloc_array(DAMOS_MAX_SCORE + 1,
+ sizeof(*ctx->regions_score_histogram), GFP_KERNEL);
+ if (!ctx->regions_score_histogram)
+ goto done;
+
+ sz_limit = damon_region_sz_limit(ctx);
+
+ while (!kdamond_need_stop(ctx)) {
+ /*
+ * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could
+ * be changed from kdamond_call(). Read the values here, and
+ * use those for this iteration. That is, damon_set_attrs()
+ * updated new values are respected from next iteration.
+ */
+ unsigned long next_aggregation_sis = ctx->next_aggregation_sis;
+ unsigned long next_ops_update_sis = ctx->next_ops_update_sis;
+ unsigned long sample_interval = ctx->attrs.sample_interval;
+
+ if (kdamond_wait_activation(ctx))
+ break;
+
+ if (ctx->ops.prepare_access_checks)
+ ctx->ops.prepare_access_checks(ctx);
+
+ kdamond_usleep(sample_interval);
+ ctx->passed_sample_intervals++;
+
+ if (ctx->ops.check_accesses)
+ max_nr_accesses = ctx->ops.check_accesses(ctx);
+
+ if (ctx->passed_sample_intervals >= next_aggregation_sis)
+ kdamond_merge_regions(ctx,
+ max_nr_accesses / 10,
+ sz_limit);
+
+ /*
+ * do kdamond_call() and kdamond_apply_schemes() after
+ * kdamond_merge_regions() if possible, to reduce overhead
+ */
+ kdamond_call(ctx, false);
+ if (!list_empty(&ctx->schemes))
+ kdamond_apply_schemes(ctx);
+ else
+ damos_walk_cancel(ctx);
+
+ sample_interval = ctx->attrs.sample_interval ?
+ ctx->attrs.sample_interval : 1;
+ if (ctx->passed_sample_intervals >= next_aggregation_sis) {
+ if (ctx->attrs.intervals_goal.aggrs &&
+ ctx->passed_sample_intervals >=
+ ctx->next_intervals_tune_sis) {
+ /*
+ * ctx->next_aggregation_sis might be updated
+ * from kdamond_call(). In the case,
+ * damon_set_attrs() which will be called from
+ * kdamond_tune_interval() may wrongly think
+ * this is in the middle of the current
+ * aggregation, and make aggregation
+ * information reset for all regions. Then,
+ * following kdamond_reset_aggregated() call
+ * will make the region information invalid,
+ * particularly for ->nr_accesses_bp.
+ *
+ * Reset ->next_aggregation_sis to avoid that.
+ * It will anyway correctly updated after this
+ * if caluse.
+ */
+ ctx->next_aggregation_sis =
+ next_aggregation_sis;
+ ctx->next_intervals_tune_sis +=
+ ctx->attrs.aggr_samples *
+ ctx->attrs.intervals_goal.aggrs;
+ kdamond_tune_intervals(ctx);
+ sample_interval = ctx->attrs.sample_interval ?
+ ctx->attrs.sample_interval : 1;
+
+ }
+ ctx->next_aggregation_sis = next_aggregation_sis +
+ ctx->attrs.aggr_interval / sample_interval;
+
+ kdamond_reset_aggregated(ctx);
+ kdamond_split_regions(ctx);
+ }
+
+ if (ctx->passed_sample_intervals >= next_ops_update_sis) {
+ ctx->next_ops_update_sis = next_ops_update_sis +
+ ctx->attrs.ops_update_interval /
+ sample_interval;
+ if (ctx->ops.update)
+ ctx->ops.update(ctx);
+ sz_limit = damon_region_sz_limit(ctx);
+ }
+ }
+done:
+ damon_for_each_target(t, ctx) {
+ damon_for_each_region_safe(r, next, t)
+ damon_destroy_region(r, t);
+ }
+
+ if (ctx->ops.cleanup)
+ ctx->ops.cleanup(ctx);
+ kfree(ctx->regions_score_histogram);
+
+ pr_debug("kdamond (%d) finishes\n", current->pid);
+ mutex_lock(&ctx->kdamond_lock);
+ ctx->kdamond = NULL;
+ mutex_unlock(&ctx->kdamond_lock);
+
+ kdamond_call(ctx, true);
+ damos_walk_cancel(ctx);
+
+ mutex_lock(&damon_lock);
+ nr_running_ctxs--;
+ if (!nr_running_ctxs && running_exclusive_ctxs)
+ running_exclusive_ctxs = false;
+ mutex_unlock(&damon_lock);
+
+ damon_destroy_targets(ctx);
+ return 0;
+}
+
+/*
+ * struct damon_system_ram_region - System RAM resource address region of
+ * [@start, @end).
+ * @start: Start address of the region (inclusive).
+ * @end: End address of the region (exclusive).
+ */
+struct damon_system_ram_region {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int walk_system_ram(struct resource *res, void *arg)
+{
+ struct damon_system_ram_region *a = arg;
+
+ if (a->end - a->start < resource_size(res)) {
+ a->start = res->start;
+ a->end = res->end;
+ }
+ return 0;
+}
+
+/*
+ * Find biggest 'System RAM' resource and store its start and end address in
+ * @start and @end, respectively. If no System RAM is found, returns false.
+ */
+static bool damon_find_biggest_system_ram(unsigned long *start,
+ unsigned long *end)
+
+{
+ struct damon_system_ram_region arg = {};
+
+ walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram);
+ if (arg.end <= arg.start)
+ return false;
+
+ *start = arg.start;
+ *end = arg.end;
+ return true;
+}
+
+/**
+ * damon_set_region_biggest_system_ram_default() - Set the region of the given
+ * monitoring target as requested, or biggest 'System RAM'.
+ * @t: The monitoring target to set the region.
+ * @start: The pointer to the start address of the region.
+ * @end: The pointer to the end address of the region.
+ * @min_sz_region: Minimum region size.
+ *
+ * This function sets the region of @t as requested by @start and @end. If the
+ * values of @start and @end are zero, however, this function finds the biggest
+ * 'System RAM' resource and sets the region to cover the resource. In the
+ * latter case, this function saves the start and end addresses of the resource
+ * in @start and @end, respectively.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_set_region_biggest_system_ram_default(struct damon_target *t,
+ unsigned long *start, unsigned long *end,
+ unsigned long min_sz_region)
+{
+ struct damon_addr_range addr_range;
+
+ if (*start > *end)
+ return -EINVAL;
+
+ if (!*start && !*end &&
+ !damon_find_biggest_system_ram(start, end))
+ return -EINVAL;
+
+ addr_range.start = *start;
+ addr_range.end = *end;
+ return damon_set_regions(t, &addr_range, 1, min_sz_region);
+}
+
+/*
+ * damon_moving_sum() - Calculate an inferred moving sum value.
+ * @mvsum: Inferred sum of the last @len_window values.
+ * @nomvsum: Non-moving sum of the last discrete @len_window window values.
+ * @len_window: The number of last values to take care of.
+ * @new_value: New value that will be added to the pseudo moving sum.
+ *
+ * Moving sum (moving average * window size) is good for handling noise, but
+ * the cost of keeping past values can be high for arbitrary window size. This
+ * function implements a lightweight pseudo moving sum function that doesn't
+ * keep the past window values.
+ *
+ * It simply assumes there was no noise in the past, and get the no-noise
+ * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a
+ * non-moving sum of the last window. For example, if @len_window is 10 and we
+ * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25
+ * values. Hence, this function simply drops @nomvsum / @len_window from
+ * given @mvsum and add @new_value.
+ *
+ * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for
+ * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For
+ * calculating next moving sum with a new value, we should drop 0 from 50 and
+ * add the new value. However, this function assumes it got value 5 for each
+ * of the last ten times. Based on the assumption, when the next value is
+ * measured, it drops the assumed past value, 5 from the current sum, and add
+ * the new value to get the updated pseduo-moving average.
+ *
+ * This means the value could have errors, but the errors will be disappeared
+ * for every @len_window aligned calls. For example, if @len_window is 10, the
+ * pseudo moving sum with 11th value to 19th value would have an error. But
+ * the sum with 20th value will not have the error.
+ *
+ * Return: Pseudo-moving average after getting the @new_value.
+ */
+static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum,
+ unsigned int len_window, unsigned int new_value)
+{
+ return mvsum - nomvsum / len_window + new_value;
+}
+
+/**
+ * damon_update_region_access_rate() - Update the access rate of a region.
+ * @r: The DAMON region to update for its access check result.
+ * @accessed: Whether the region has accessed during last sampling interval.
+ * @attrs: The damon_attrs of the DAMON context.
+ *
+ * Update the access rate of a region with the region's last sampling interval
+ * access check result.
+ *
+ * Usually this will be called by &damon_operations->check_accesses callback.
+ */
+void damon_update_region_access_rate(struct damon_region *r, bool accessed,
+ struct damon_attrs *attrs)
+{
+ unsigned int len_window = 1;
+
+ /*
+ * sample_interval can be zero, but cannot be larger than
+ * aggr_interval, owing to validation of damon_set_attrs().
+ */
+ if (attrs->sample_interval)
+ len_window = damon_max_nr_accesses(attrs);
+ r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp,
+ r->last_nr_accesses * 10000, len_window,
+ accessed ? 10000 : 0);
+
+ if (accessed)
+ r->nr_accesses++;
+}
+
+/**
+ * damon_initialized() - Return if DAMON is ready to be used.
+ *
+ * Return: true if DAMON is ready to be used, false otherwise.
+ */
+bool damon_initialized(void)
+{
+ return damon_region_cache != NULL;
+}
+
+static int __init damon_init(void)
+{
+ damon_region_cache = KMEM_CACHE(damon_region, 0);
+ if (unlikely(!damon_region_cache)) {
+ pr_err("creating damon_region_cache fails\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+subsys_initcall(damon_init);
+
+#include "tests/core-kunit.h"
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
new file mode 100644
index 000000000000..49b4bc294f4e
--- /dev/null
+++ b/mm/damon/lru_sort.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON-based LRU-lists Sorting
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon-lru-sort: " fmt
+
+#include <linux/damon.h>
+#include <linux/kstrtox.h>
+#include <linux/module.h>
+
+#include "modules-common.h"
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "damon_lru_sort."
+
+/*
+ * Enable or disable DAMON_LRU_SORT.
+ *
+ * You can enable DAMON_LRU_SORT by setting the value of this parameter as
+ * ``Y``. Setting it as ``N`` disables DAMON_LRU_SORT. Note that
+ * DAMON_LRU_SORT could do no real monitoring and LRU-lists sorting due to the
+ * watermarks-based activation condition. Refer to below descriptions for the
+ * watermarks parameter for this.
+ */
+static bool enabled __read_mostly;
+
+/*
+ * Make DAMON_LRU_SORT reads the input parameters again, except ``enabled``.
+ *
+ * Input parameters that updated while DAMON_LRU_SORT is running are not
+ * applied by default. Once this parameter is set as ``Y``, DAMON_LRU_SORT
+ * reads values of parametrs except ``enabled`` again. Once the re-reading is
+ * done, this parameter is set as ``N``. If invalid parameters are found while
+ * the re-reading, DAMON_LRU_SORT will be disabled.
+ */
+static bool commit_inputs __read_mostly;
+module_param(commit_inputs, bool, 0600);
+
+/*
+ * Access frequency threshold for hot memory regions identification in permil.
+ *
+ * If a memory region is accessed in frequency of this or higher,
+ * DAMON_LRU_SORT identifies the region as hot, and mark it as accessed on the
+ * LRU list, so that it could not be reclaimed under memory pressure. 50% by
+ * default.
+ */
+static unsigned long hot_thres_access_freq = 500;
+module_param(hot_thres_access_freq, ulong, 0600);
+
+/*
+ * Time threshold for cold memory regions identification in microseconds.
+ *
+ * If a memory region is not accessed for this or longer time, DAMON_LRU_SORT
+ * identifies the region as cold, and mark it as unaccessed on the LRU list, so
+ * that it could be reclaimed first under memory pressure. 120 seconds by
+ * default.
+ */
+static unsigned long cold_min_age __read_mostly = 120000000;
+module_param(cold_min_age, ulong, 0600);
+
+static struct damos_quota damon_lru_sort_quota = {
+ /* Use up to 10 ms per 1 sec, by default */
+ .ms = 10,
+ .sz = 0,
+ .reset_interval = 1000,
+ /* Within the quota, mark hotter regions accessed first. */
+ .weight_sz = 0,
+ .weight_nr_accesses = 1,
+ .weight_age = 0,
+};
+DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(damon_lru_sort_quota);
+
+static struct damos_watermarks damon_lru_sort_wmarks = {
+ .metric = DAMOS_WMARK_FREE_MEM_RATE,
+ .interval = 5000000, /* 5 seconds */
+ .high = 200, /* 20 percent */
+ .mid = 150, /* 15 percent */
+ .low = 50, /* 5 percent */
+};
+DEFINE_DAMON_MODULES_WMARKS_PARAMS(damon_lru_sort_wmarks);
+
+static struct damon_attrs damon_lru_sort_mon_attrs = {
+ .sample_interval = 5000, /* 5 ms */
+ .aggr_interval = 100000, /* 100 ms */
+ .ops_update_interval = 0,
+ .min_nr_regions = 10,
+ .max_nr_regions = 1000,
+};
+DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(damon_lru_sort_mon_attrs);
+
+/*
+ * Start of the target memory region in physical address.
+ *
+ * The start physical address of memory region that DAMON_LRU_SORT will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_start __read_mostly;
+module_param(monitor_region_start, ulong, 0600);
+
+/*
+ * End of the target memory region in physical address.
+ *
+ * The end physical address of memory region that DAMON_LRU_SORT will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_end __read_mostly;
+module_param(monitor_region_end, ulong, 0600);
+
+/*
+ * Scale factor for DAMON_LRU_SORT to ops address conversion.
+ *
+ * This parameter must not be set to 0.
+ */
+static unsigned long addr_unit __read_mostly = 1;
+
+/*
+ * PID of the DAMON thread
+ *
+ * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread.
+ * Else, -1.
+ */
+static int kdamond_pid __read_mostly = -1;
+module_param(kdamond_pid, int, 0400);
+
+static struct damos_stat damon_lru_sort_hot_stat;
+DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_hot_stat,
+ lru_sort_tried_hot_regions, lru_sorted_hot_regions,
+ hot_quota_exceeds);
+
+static struct damos_stat damon_lru_sort_cold_stat;
+DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_cold_stat,
+ lru_sort_tried_cold_regions, lru_sorted_cold_regions,
+ cold_quota_exceeds);
+
+static struct damos_access_pattern damon_lru_sort_stub_pattern = {
+ /* Find regions having PAGE_SIZE or larger size */
+ .min_sz_region = PAGE_SIZE,
+ .max_sz_region = ULONG_MAX,
+ /* no matter its access frequency */
+ .min_nr_accesses = 0,
+ .max_nr_accesses = UINT_MAX,
+ /* no matter its age */
+ .min_age_region = 0,
+ .max_age_region = UINT_MAX,
+};
+
+static struct damon_ctx *ctx;
+static struct damon_target *target;
+
+static struct damos *damon_lru_sort_new_scheme(
+ struct damos_access_pattern *pattern, enum damos_action action)
+{
+ struct damos_quota quota = damon_lru_sort_quota;
+
+ /* Use half of total quota for hot/cold pages sorting */
+ quota.ms = quota.ms / 2;
+
+ return damon_new_scheme(
+ /* find the pattern, and */
+ pattern,
+ /* (de)prioritize on LRU-lists */
+ action,
+ /* for each aggregation interval */
+ 0,
+ /* under the quota. */
+ &quota,
+ /* (De)activate this according to the watermarks. */
+ &damon_lru_sort_wmarks,
+ NUMA_NO_NODE);
+}
+
+/* Create a DAMON-based operation scheme for hot memory regions */
+static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres)
+{
+ struct damos_access_pattern pattern = damon_lru_sort_stub_pattern;
+
+ pattern.min_nr_accesses = hot_thres;
+ return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_PRIO);
+}
+
+/* Create a DAMON-based operation scheme for cold memory regions */
+static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
+{
+ struct damos_access_pattern pattern = damon_lru_sort_stub_pattern;
+
+ pattern.max_nr_accesses = 0;
+ pattern.min_age_region = cold_thres;
+ return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO);
+}
+
+static int damon_lru_sort_apply_parameters(void)
+{
+ struct damon_ctx *param_ctx;
+ struct damon_target *param_target;
+ struct damos *hot_scheme, *cold_scheme;
+ unsigned int hot_thres, cold_thres;
+ int err;
+
+ err = damon_modules_new_paddr_ctx_target(&param_ctx, &param_target);
+ if (err)
+ return err;
+
+ /*
+ * If monitor_region_start/end are unset, always silently
+ * reset addr_unit to 1.
+ */
+ if (!monitor_region_start && !monitor_region_end)
+ addr_unit = 1;
+ param_ctx->addr_unit = addr_unit;
+ param_ctx->min_sz_region = max(DAMON_MIN_REGION / addr_unit, 1);
+
+ if (!damon_lru_sort_mon_attrs.sample_interval) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = damon_set_attrs(param_ctx, &damon_lru_sort_mon_attrs);
+ if (err)
+ goto out;
+
+ err = -ENOMEM;
+ hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) *
+ hot_thres_access_freq / 1000;
+ hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres);
+ if (!hot_scheme)
+ goto out;
+
+ cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval;
+ cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres);
+ if (!cold_scheme) {
+ damon_destroy_scheme(hot_scheme);
+ goto out;
+ }
+
+ damon_set_schemes(param_ctx, &hot_scheme, 1);
+ damon_add_scheme(param_ctx, cold_scheme);
+
+ err = damon_set_region_biggest_system_ram_default(param_target,
+ &monitor_region_start,
+ &monitor_region_end,
+ param_ctx->min_sz_region);
+ if (err)
+ goto out;
+ err = damon_commit_ctx(ctx, param_ctx);
+out:
+ damon_destroy_ctx(param_ctx);
+ return err;
+}
+
+static int damon_lru_sort_handle_commit_inputs(void)
+{
+ int err;
+
+ if (!commit_inputs)
+ return 0;
+
+ err = damon_lru_sort_apply_parameters();
+ commit_inputs = false;
+ return err;
+}
+
+static int damon_lru_sort_damon_call_fn(void *arg)
+{
+ struct damon_ctx *c = arg;
+ struct damos *s;
+
+ /* update the stats parameter */
+ damon_for_each_scheme(s, c) {
+ if (s->action == DAMOS_LRU_PRIO)
+ damon_lru_sort_hot_stat = s->stat;
+ else if (s->action == DAMOS_LRU_DEPRIO)
+ damon_lru_sort_cold_stat = s->stat;
+ }
+
+ return damon_lru_sort_handle_commit_inputs();
+}
+
+static struct damon_call_control call_control = {
+ .fn = damon_lru_sort_damon_call_fn,
+ .repeat = true,
+};
+
+static int damon_lru_sort_turn(bool on)
+{
+ int err;
+
+ if (!on) {
+ err = damon_stop(&ctx, 1);
+ if (!err)
+ kdamond_pid = -1;
+ return err;
+ }
+
+ err = damon_lru_sort_apply_parameters();
+ if (err)
+ return err;
+
+ err = damon_start(&ctx, 1, true);
+ if (err)
+ return err;
+ kdamond_pid = ctx->kdamond->pid;
+ return damon_call(ctx, &call_control);
+}
+
+static int damon_lru_sort_addr_unit_store(const char *val,
+ const struct kernel_param *kp)
+{
+ unsigned long input_addr_unit;
+ int err = kstrtoul(val, 0, &input_addr_unit);
+
+ if (err)
+ return err;
+ if (!input_addr_unit)
+ return -EINVAL;
+
+ addr_unit = input_addr_unit;
+ return 0;
+}
+
+static const struct kernel_param_ops addr_unit_param_ops = {
+ .set = damon_lru_sort_addr_unit_store,
+ .get = param_get_ulong,
+};
+
+module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600);
+MODULE_PARM_DESC(addr_unit,
+ "Scale factor for DAMON_LRU_SORT to ops address conversion (default: 1)");
+
+static int damon_lru_sort_enabled_store(const char *val,
+ const struct kernel_param *kp)
+{
+ bool is_enabled = enabled;
+ bool enable;
+ int err;
+
+ err = kstrtobool(val, &enable);
+ if (err)
+ return err;
+
+ if (is_enabled == enable)
+ return 0;
+
+ /* Called before init function. The function will handle this. */
+ if (!damon_initialized())
+ goto set_param_out;
+
+ err = damon_lru_sort_turn(enable);
+ if (err)
+ return err;
+
+set_param_out:
+ enabled = enable;
+ return err;
+}
+
+static const struct kernel_param_ops enabled_param_ops = {
+ .set = damon_lru_sort_enabled_store,
+ .get = param_get_bool,
+};
+
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+MODULE_PARM_DESC(enabled,
+ "Enable or disable DAMON_LRU_SORT (default: disabled)");
+
+static int __init damon_lru_sort_init(void)
+{
+ int err;
+
+ if (!damon_initialized()) {
+ err = -ENOMEM;
+ goto out;
+ }
+ err = damon_modules_new_paddr_ctx_target(&ctx, &target);
+ if (err)
+ goto out;
+
+ call_control.data = ctx;
+
+ /* 'enabled' has set before this function, probably via command line */
+ if (enabled)
+ err = damon_lru_sort_turn(true);
+
+out:
+ if (err && enabled)
+ enabled = false;
+ return err;
+}
+
+module_init(damon_lru_sort_init);
diff --git a/mm/damon/modules-common.c b/mm/damon/modules-common.c
new file mode 100644
index 000000000000..86d58f8c4f63
--- /dev/null
+++ b/mm/damon/modules-common.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Code for DAMON Modules
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/damon.h>
+
+#include "modules-common.h"
+
+/*
+ * Allocate, set, and return a DAMON context for the physical address space.
+ * @ctxp: Pointer to save the point to the newly created context
+ * @targetp: Pointer to save the point to the newly created target
+ */
+int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp,
+ struct damon_target **targetp)
+{
+ struct damon_ctx *ctx;
+ struct damon_target *target;
+
+ ctx = damon_new_ctx();
+ if (!ctx)
+ return -ENOMEM;
+
+ if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
+ damon_destroy_ctx(ctx);
+ return -EINVAL;
+ }
+
+ target = damon_new_target();
+ if (!target) {
+ damon_destroy_ctx(ctx);
+ return -ENOMEM;
+ }
+ damon_add_target(ctx, target);
+
+ *ctxp = ctx;
+ *targetp = target;
+ return 0;
+}
diff --git a/mm/damon/modules-common.h b/mm/damon/modules-common.h
new file mode 100644
index 000000000000..f103ad556368
--- /dev/null
+++ b/mm/damon/modules-common.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common Code for DAMON Modules
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/moduleparam.h>
+
+#define DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(attrs) \
+ module_param_named(sample_interval, attrs.sample_interval, \
+ ulong, 0600); \
+ module_param_named(aggr_interval, attrs.aggr_interval, ulong, \
+ 0600); \
+ module_param_named(min_nr_regions, attrs.min_nr_regions, ulong, \
+ 0600); \
+ module_param_named(max_nr_regions, attrs.max_nr_regions, ulong, \
+ 0600);
+
+#define DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(quota) \
+ module_param_named(quota_ms, quota.ms, ulong, 0600); \
+ module_param_named(quota_reset_interval_ms, \
+ quota.reset_interval, ulong, 0600);
+
+#define DEFINE_DAMON_MODULES_DAMOS_QUOTAS(quota) \
+ DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(quota) \
+ module_param_named(quota_sz, quota.sz, ulong, 0600);
+
+#define DEFINE_DAMON_MODULES_WMARKS_PARAMS(wmarks) \
+ module_param_named(wmarks_interval, wmarks.interval, ulong, \
+ 0600); \
+ module_param_named(wmarks_high, wmarks.high, ulong, 0600); \
+ module_param_named(wmarks_mid, wmarks.mid, ulong, 0600); \
+ module_param_named(wmarks_low, wmarks.low, ulong, 0600);
+
+#define DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(stat, try_name, \
+ succ_name, qt_exceed_name) \
+ module_param_named(nr_##try_name, stat.nr_tried, ulong, 0400); \
+ module_param_named(bytes_##try_name, stat.sz_tried, ulong, \
+ 0400); \
+ module_param_named(nr_##succ_name, stat.nr_applied, ulong, \
+ 0400); \
+ module_param_named(bytes_##succ_name, stat.sz_applied, ulong, \
+ 0400); \
+ module_param_named(nr_##qt_exceed_name, stat.qt_exceeds, ulong, \
+ 0400);
+
+int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp,
+ struct damon_target **targetp);
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
new file mode 100644
index 000000000000..a218d9922234
--- /dev/null
+++ b/mm/damon/ops-common.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Code for Data Access Monitoring
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/migrate.h>
+#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/swap.h>
+#include <linux/leafops.h>
+
+#include "../internal.h"
+#include "ops-common.h"
+
+/*
+ * Get an online page for a pfn if it's in the LRU list. Otherwise, returns
+ * NULL.
+ *
+ * The body of this function is stolen from the 'page_idle_get_folio()'. We
+ * steal rather than reuse it because the code is quite simple.
+ */
+struct folio *damon_get_folio(unsigned long pfn)
+{
+ struct page *page = pfn_to_online_page(pfn);
+ struct folio *folio;
+
+ if (!page)
+ return NULL;
+
+ folio = page_folio(page);
+ if (!folio_test_lru(folio) || !folio_try_get(folio))
+ return NULL;
+ if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) {
+ folio_put(folio);
+ folio = NULL;
+ }
+ return folio;
+}
+
+void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr)
+{
+ pte_t pteval = ptep_get(pte);
+ struct folio *folio;
+ bool young = false;
+ unsigned long pfn;
+
+ if (likely(pte_present(pteval)))
+ pfn = pte_pfn(pteval);
+ else
+ pfn = softleaf_to_pfn(softleaf_from_pte(pteval));
+
+ folio = damon_get_folio(pfn);
+ if (!folio)
+ return;
+
+ /*
+ * PFN swap PTEs, such as device-exclusive ones, that actually map pages
+ * are "old" from a CPU perspective. The MMU notifier takes care of any
+ * device aspects.
+ */
+ if (likely(pte_present(pteval)))
+ young |= ptep_test_and_clear_young(vma, addr, pte);
+ young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE);
+ if (young)
+ folio_set_young(folio);
+
+ folio_set_idle(folio);
+ folio_put(folio);
+}
+
+void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ pmd_t pmdval = pmdp_get(pmd);
+ struct folio *folio;
+ bool young = false;
+ unsigned long pfn;
+
+ if (likely(pmd_present(pmdval)))
+ pfn = pmd_pfn(pmdval);
+ else
+ pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval));
+
+ folio = damon_get_folio(pfn);
+ if (!folio)
+ return;
+
+ if (likely(pmd_present(pmdval)))
+ young |= pmdp_clear_young_notify(vma, addr, pmd);
+ young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE);
+ if (young)
+ folio_set_young(folio);
+
+ folio_set_idle(folio);
+ folio_put(folio);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+}
+
+#define DAMON_MAX_SUBSCORE (100)
+#define DAMON_MAX_AGE_IN_LOG (32)
+
+int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
+ struct damos *s)
+{
+ int freq_subscore;
+ unsigned int age_in_sec;
+ int age_in_log, age_subscore;
+ unsigned int freq_weight = s->quota.weight_nr_accesses;
+ unsigned int age_weight = s->quota.weight_age;
+ int hotness;
+
+ freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE /
+ damon_max_nr_accesses(&c->attrs);
+
+ age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000;
+ for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec;
+ age_in_log++, age_in_sec >>= 1)
+ ;
+
+ /* If frequency is 0, higher age means it's colder */
+ if (freq_subscore == 0)
+ age_in_log *= -1;
+
+ /*
+ * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG].
+ * Scale it to be in [0, 100] and set it as age subscore.
+ */
+ age_in_log += DAMON_MAX_AGE_IN_LOG;
+ age_subscore = age_in_log * DAMON_MAX_SUBSCORE /
+ DAMON_MAX_AGE_IN_LOG / 2;
+
+ hotness = (freq_weight * freq_subscore + age_weight * age_subscore);
+ if (freq_weight + age_weight)
+ hotness /= freq_weight + age_weight;
+ /*
+ * Transform it to fit in [0, DAMOS_MAX_SCORE]
+ */
+ hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE;
+
+ return hotness;
+}
+
+int damon_cold_score(struct damon_ctx *c, struct damon_region *r,
+ struct damos *s)
+{
+ int hotness = damon_hot_score(c, r, s);
+
+ /* Return coldness of the region */
+ return DAMOS_MAX_SCORE - hotness;
+}
+
+static bool damon_folio_mkold_one(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr, void *arg)
+{
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
+
+ while (page_vma_mapped_walk(&pvmw)) {
+ addr = pvmw.address;
+ if (pvmw.pte)
+ damon_ptep_mkold(pvmw.pte, vma, addr);
+ else
+ damon_pmdp_mkold(pvmw.pmd, vma, addr);
+ }
+ return true;
+}
+
+void damon_folio_mkold(struct folio *folio)
+{
+ struct rmap_walk_control rwc = {
+ .rmap_one = damon_folio_mkold_one,
+ .anon_lock = folio_lock_anon_vma_read,
+ };
+
+ if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
+ folio_set_idle(folio);
+ return;
+ }
+
+ if (!folio_trylock(folio))
+ return;
+
+ rmap_walk(folio, &rwc);
+ folio_unlock(folio);
+
+}
+
+static bool damon_folio_young_one(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr, void *arg)
+{
+ bool *accessed = arg;
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
+ pte_t pte;
+
+ *accessed = false;
+ while (page_vma_mapped_walk(&pvmw)) {
+ addr = pvmw.address;
+ if (pvmw.pte) {
+ pte = ptep_get(pvmw.pte);
+
+ /*
+ * PFN swap PTEs, such as device-exclusive ones, that
+ * actually map pages are "old" from a CPU perspective.
+ * The MMU notifier takes care of any device aspects.
+ */
+ *accessed = (pte_present(pte) && pte_young(pte)) ||
+ !folio_test_idle(folio) ||
+ mmu_notifier_test_young(vma->vm_mm, addr);
+ } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ pmd_t pmd = pmdp_get(pvmw.pmd);
+
+ *accessed = (pmd_present(pmd) && pmd_young(pmd)) ||
+ !folio_test_idle(folio) ||
+ mmu_notifier_test_young(vma->vm_mm, addr);
+#else
+ WARN_ON_ONCE(1);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ }
+ if (*accessed) {
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ }
+
+ /* If accessed, stop walking */
+ return *accessed == false;
+}
+
+bool damon_folio_young(struct folio *folio)
+{
+ bool accessed = false;
+ struct rmap_walk_control rwc = {
+ .arg = &accessed,
+ .rmap_one = damon_folio_young_one,
+ .anon_lock = folio_lock_anon_vma_read,
+ };
+
+ if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
+ if (folio_test_idle(folio))
+ return false;
+ else
+ return true;
+ }
+
+ if (!folio_trylock(folio))
+ return false;
+
+ rmap_walk(folio, &rwc);
+ folio_unlock(folio);
+
+ return accessed;
+}
+
+bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio)
+{
+ bool matched = false;
+ struct mem_cgroup *memcg;
+ size_t folio_sz;
+
+ switch (filter->type) {
+ case DAMOS_FILTER_TYPE_ANON:
+ matched = folio_test_anon(folio);
+ break;
+ case DAMOS_FILTER_TYPE_ACTIVE:
+ matched = folio_test_active(folio);
+ break;
+ case DAMOS_FILTER_TYPE_MEMCG:
+ rcu_read_lock();
+ memcg = folio_memcg_check(folio);
+ if (!memcg)
+ matched = false;
+ else
+ matched = filter->memcg_id == mem_cgroup_id(memcg);
+ rcu_read_unlock();
+ break;
+ case DAMOS_FILTER_TYPE_YOUNG:
+ matched = damon_folio_young(folio);
+ if (matched)
+ damon_folio_mkold(folio);
+ break;
+ case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE:
+ folio_sz = folio_size(folio);
+ matched = filter->sz_range.min <= folio_sz &&
+ folio_sz <= filter->sz_range.max;
+ break;
+ case DAMOS_FILTER_TYPE_UNMAPPED:
+ matched = !folio_mapped(folio) || !folio_raw_mapping(folio);
+ break;
+ default:
+ break;
+ }
+
+ return matched == filter->matching;
+}
+
+static unsigned int __damon_migrate_folio_list(
+ struct list_head *migrate_folios, struct pglist_data *pgdat,
+ int target_nid)
+{
+ unsigned int nr_succeeded = 0;
+ struct migration_target_control mtc = {
+ /*
+ * Allocate from 'node', or fail quickly and quietly.
+ * When this happens, 'page' will likely just be discarded
+ * instead of migrated.
+ */
+ .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
+ __GFP_NOMEMALLOC | GFP_NOWAIT,
+ .nid = target_nid,
+ };
+
+ if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE)
+ return 0;
+
+ if (list_empty(migrate_folios))
+ return 0;
+
+ /* Migration ignores all cpuset and mempolicy settings */
+ migrate_pages(migrate_folios, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_ASYNC, MR_DAMON,
+ &nr_succeeded);
+
+ return nr_succeeded;
+}
+
+static unsigned int damon_migrate_folio_list(struct list_head *folio_list,
+ struct pglist_data *pgdat,
+ int target_nid)
+{
+ unsigned int nr_migrated = 0;
+ struct folio *folio;
+ LIST_HEAD(ret_folios);
+ LIST_HEAD(migrate_folios);
+
+ while (!list_empty(folio_list)) {
+ struct folio *folio;
+
+ cond_resched();
+
+ folio = lru_to_folio(folio_list);
+ list_del(&folio->lru);
+
+ if (!folio_trylock(folio))
+ goto keep;
+
+ /* Relocate its contents to another node. */
+ list_add(&folio->lru, &migrate_folios);
+ folio_unlock(folio);
+ continue;
+keep:
+ list_add(&folio->lru, &ret_folios);
+ }
+ /* 'folio_list' is always empty here */
+
+ /* Migrate folios selected for migration */
+ nr_migrated += __damon_migrate_folio_list(
+ &migrate_folios, pgdat, target_nid);
+ /*
+ * Folios that could not be migrated are still in @migrate_folios. Add
+ * those back on @folio_list
+ */
+ if (!list_empty(&migrate_folios))
+ list_splice_init(&migrate_folios, folio_list);
+
+ try_to_unmap_flush();
+
+ list_splice(&ret_folios, folio_list);
+
+ while (!list_empty(folio_list)) {
+ folio = lru_to_folio(folio_list);
+ list_del(&folio->lru);
+ folio_putback_lru(folio);
+ }
+
+ return nr_migrated;
+}
+
+unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid)
+{
+ int nid;
+ unsigned long nr_migrated = 0;
+ LIST_HEAD(node_folio_list);
+ unsigned int noreclaim_flag;
+
+ if (list_empty(folio_list))
+ return nr_migrated;
+
+ if (target_nid < 0 || target_nid >= MAX_NUMNODES ||
+ !node_state(target_nid, N_MEMORY))
+ return nr_migrated;
+
+ noreclaim_flag = memalloc_noreclaim_save();
+
+ nid = folio_nid(lru_to_folio(folio_list));
+ do {
+ struct folio *folio = lru_to_folio(folio_list);
+
+ if (nid == folio_nid(folio)) {
+ list_move(&folio->lru, &node_folio_list);
+ continue;
+ }
+
+ nr_migrated += damon_migrate_folio_list(&node_folio_list,
+ NODE_DATA(nid),
+ target_nid);
+ nid = folio_nid(lru_to_folio(folio_list));
+ } while (!list_empty(folio_list));
+
+ nr_migrated += damon_migrate_folio_list(&node_folio_list,
+ NODE_DATA(nid),
+ target_nid);
+
+ memalloc_noreclaim_restore(noreclaim_flag);
+
+ return nr_migrated;
+}
+
+bool damos_ops_has_filter(struct damos *s)
+{
+ struct damos_filter *f;
+
+ damos_for_each_ops_filter(f, s)
+ return true;
+ return false;
+}
diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h
new file mode 100644
index 000000000000..5efa5b5970de
--- /dev/null
+++ b/mm/damon/ops-common.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common Code for Data Access Monitoring
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/damon.h>
+
+struct folio *damon_get_folio(unsigned long pfn);
+
+void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr);
+void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr);
+void damon_folio_mkold(struct folio *folio);
+bool damon_folio_young(struct folio *folio);
+
+int damon_cold_score(struct damon_ctx *c, struct damon_region *r,
+ struct damos *s);
+int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
+ struct damos *s);
+
+bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio);
+unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid);
+
+bool damos_ops_has_filter(struct damos *s);
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
new file mode 100644
index 000000000000..07a8aead439e
--- /dev/null
+++ b/mm/damon/paddr.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON Code for The Physical Address Space
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon-pa: " fmt
+
+#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/swap.h>
+#include <linux/memory-tiers.h>
+#include <linux/mm_inline.h>
+
+#include "../internal.h"
+#include "ops-common.h"
+
+static phys_addr_t damon_pa_phys_addr(
+ unsigned long addr, unsigned long addr_unit)
+{
+ return (phys_addr_t)addr * addr_unit;
+}
+
+static unsigned long damon_pa_core_addr(
+ phys_addr_t pa, unsigned long addr_unit)
+{
+ /*
+ * Use div_u64() for avoiding linking errors related with __udivdi3,
+ * __aeabi_uldivmod, or similar problems. This should also improve the
+ * performance optimization (read div_u64() comment for the detail).
+ */
+ if (sizeof(pa) == 8 && sizeof(addr_unit) == 4)
+ return div_u64(pa, addr_unit);
+ return pa / addr_unit;
+}
+
+static void damon_pa_mkold(phys_addr_t paddr)
+{
+ struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
+
+ if (!folio)
+ return;
+
+ damon_folio_mkold(folio);
+ folio_put(folio);
+}
+
+static void __damon_pa_prepare_access_check(struct damon_region *r,
+ unsigned long addr_unit)
+{
+ r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
+
+ damon_pa_mkold(damon_pa_phys_addr(r->sampling_addr, addr_unit));
+}
+
+static void damon_pa_prepare_access_checks(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+
+ damon_for_each_target(t, ctx) {
+ damon_for_each_region(r, t)
+ __damon_pa_prepare_access_check(r, ctx->addr_unit);
+ }
+}
+
+static bool damon_pa_young(phys_addr_t paddr, unsigned long *folio_sz)
+{
+ struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
+ bool accessed;
+
+ if (!folio)
+ return false;
+
+ accessed = damon_folio_young(folio);
+ *folio_sz = folio_size(folio);
+ folio_put(folio);
+ return accessed;
+}
+
+static void __damon_pa_check_access(struct damon_region *r,
+ struct damon_attrs *attrs, unsigned long addr_unit)
+{
+ static phys_addr_t last_addr;
+ static unsigned long last_folio_sz = PAGE_SIZE;
+ static bool last_accessed;
+ phys_addr_t sampling_addr = damon_pa_phys_addr(
+ r->sampling_addr, addr_unit);
+
+ /* If the region is in the last checked page, reuse the result */
+ if (ALIGN_DOWN(last_addr, last_folio_sz) ==
+ ALIGN_DOWN(sampling_addr, last_folio_sz)) {
+ damon_update_region_access_rate(r, last_accessed, attrs);
+ return;
+ }
+
+ last_accessed = damon_pa_young(sampling_addr, &last_folio_sz);
+ damon_update_region_access_rate(r, last_accessed, attrs);
+
+ last_addr = sampling_addr;
+}
+
+static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned int max_nr_accesses = 0;
+
+ damon_for_each_target(t, ctx) {
+ damon_for_each_region(r, t) {
+ __damon_pa_check_access(
+ r, &ctx->attrs, ctx->addr_unit);
+ max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
+ }
+ }
+
+ return max_nr_accesses;
+}
+
+/*
+ * damos_pa_filter_out - Return true if the page should be filtered out.
+ */
+static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio)
+{
+ struct damos_filter *filter;
+
+ if (scheme->core_filters_allowed)
+ return false;
+
+ damos_for_each_ops_filter(filter, scheme) {
+ if (damos_folio_filter_match(filter, folio))
+ return !filter->allow;
+ }
+ return scheme->ops_filters_default_reject;
+}
+
+static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s)
+{
+ if (!folio)
+ return true;
+ if (folio == s->last_applied) {
+ folio_put(folio);
+ return true;
+ }
+ return false;
+}
+
+static unsigned long damon_pa_pageout(struct damon_region *r,
+ unsigned long addr_unit, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ phys_addr_t addr, applied;
+ LIST_HEAD(folio_list);
+ bool install_young_filter = true;
+ struct damos_filter *filter;
+ struct folio *folio;
+
+ /* check access in page level again by default */
+ damos_for_each_ops_filter(filter, s) {
+ if (filter->type == DAMOS_FILTER_TYPE_YOUNG) {
+ install_young_filter = false;
+ break;
+ }
+ }
+ if (install_young_filter) {
+ filter = damos_new_filter(
+ DAMOS_FILTER_TYPE_YOUNG, true, false);
+ if (!filter)
+ return 0;
+ damos_add_filter(s, filter);
+ }
+
+ addr = damon_pa_phys_addr(r->ar.start, addr_unit);
+ while (addr < damon_pa_phys_addr(r->ar.end, addr_unit)) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
+ addr += PAGE_SIZE;
+ continue;
+ }
+
+ if (damos_pa_filter_out(s, folio))
+ goto put_folio;
+ else
+ *sz_filter_passed += folio_size(folio) / addr_unit;
+
+ folio_clear_referenced(folio);
+ folio_test_clear_young(folio);
+ if (!folio_isolate_lru(folio))
+ goto put_folio;
+ if (folio_test_unevictable(folio))
+ folio_putback_lru(folio);
+ else
+ list_add(&folio->lru, &folio_list);
+put_folio:
+ addr += folio_size(folio);
+ folio_put(folio);
+ }
+ if (install_young_filter)
+ damos_destroy_filter(filter);
+ applied = reclaim_pages(&folio_list);
+ cond_resched();
+ s->last_applied = folio;
+ return damon_pa_core_addr(applied * PAGE_SIZE, addr_unit);
+}
+
+static inline unsigned long damon_pa_mark_accessed_or_deactivate(
+ struct damon_region *r, unsigned long addr_unit,
+ struct damos *s, bool mark_accessed,
+ unsigned long *sz_filter_passed)
+{
+ phys_addr_t addr, applied = 0;
+ struct folio *folio;
+
+ addr = damon_pa_phys_addr(r->ar.start, addr_unit);
+ while (addr < damon_pa_phys_addr(r->ar.end, addr_unit)) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
+ addr += PAGE_SIZE;
+ continue;
+ }
+
+ if (damos_pa_filter_out(s, folio))
+ goto put_folio;
+ else
+ *sz_filter_passed += folio_size(folio) / addr_unit;
+
+ if (mark_accessed)
+ folio_mark_accessed(folio);
+ else
+ folio_deactivate(folio);
+ applied += folio_nr_pages(folio);
+put_folio:
+ addr += folio_size(folio);
+ folio_put(folio);
+ }
+ s->last_applied = folio;
+ return damon_pa_core_addr(applied * PAGE_SIZE, addr_unit);
+}
+
+static unsigned long damon_pa_mark_accessed(struct damon_region *r,
+ unsigned long addr_unit, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ return damon_pa_mark_accessed_or_deactivate(r, addr_unit, s, true,
+ sz_filter_passed);
+}
+
+static unsigned long damon_pa_deactivate_pages(struct damon_region *r,
+ unsigned long addr_unit, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ return damon_pa_mark_accessed_or_deactivate(r, addr_unit, s, false,
+ sz_filter_passed);
+}
+
+static unsigned long damon_pa_migrate(struct damon_region *r,
+ unsigned long addr_unit, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ phys_addr_t addr, applied;
+ LIST_HEAD(folio_list);
+ struct folio *folio;
+
+ addr = damon_pa_phys_addr(r->ar.start, addr_unit);
+ while (addr < damon_pa_phys_addr(r->ar.end, addr_unit)) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
+ addr += PAGE_SIZE;
+ continue;
+ }
+
+ if (damos_pa_filter_out(s, folio))
+ goto put_folio;
+ else
+ *sz_filter_passed += folio_size(folio) / addr_unit;
+
+ if (!folio_isolate_lru(folio))
+ goto put_folio;
+ list_add(&folio->lru, &folio_list);
+put_folio:
+ addr += folio_size(folio);
+ folio_put(folio);
+ }
+ applied = damon_migrate_pages(&folio_list, s->target_nid);
+ cond_resched();
+ s->last_applied = folio;
+ return damon_pa_core_addr(applied * PAGE_SIZE, addr_unit);
+}
+
+static unsigned long damon_pa_stat(struct damon_region *r,
+ unsigned long addr_unit, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ phys_addr_t addr;
+ struct folio *folio;
+
+ if (!damos_ops_has_filter(s))
+ return 0;
+
+ addr = damon_pa_phys_addr(r->ar.start, addr_unit);
+ while (addr < damon_pa_phys_addr(r->ar.end, addr_unit)) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
+ addr += PAGE_SIZE;
+ continue;
+ }
+
+ if (!damos_pa_filter_out(s, folio))
+ *sz_filter_passed += folio_size(folio) / addr_unit;
+ addr += folio_size(folio);
+ folio_put(folio);
+ }
+ s->last_applied = folio;
+ return 0;
+}
+
+static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme, unsigned long *sz_filter_passed)
+{
+ unsigned long aunit = ctx->addr_unit;
+
+ switch (scheme->action) {
+ case DAMOS_PAGEOUT:
+ return damon_pa_pageout(r, aunit, scheme, sz_filter_passed);
+ case DAMOS_LRU_PRIO:
+ return damon_pa_mark_accessed(r, aunit, scheme,
+ sz_filter_passed);
+ case DAMOS_LRU_DEPRIO:
+ return damon_pa_deactivate_pages(r, aunit, scheme,
+ sz_filter_passed);
+ case DAMOS_MIGRATE_HOT:
+ case DAMOS_MIGRATE_COLD:
+ return damon_pa_migrate(r, aunit, scheme, sz_filter_passed);
+ case DAMOS_STAT:
+ return damon_pa_stat(r, aunit, scheme, sz_filter_passed);
+ default:
+ /* DAMOS actions that not yet supported by 'paddr'. */
+ break;
+ }
+ return 0;
+}
+
+static int damon_pa_scheme_score(struct damon_ctx *context,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
+{
+ switch (scheme->action) {
+ case DAMOS_PAGEOUT:
+ return damon_cold_score(context, r, scheme);
+ case DAMOS_LRU_PRIO:
+ return damon_hot_score(context, r, scheme);
+ case DAMOS_LRU_DEPRIO:
+ return damon_cold_score(context, r, scheme);
+ case DAMOS_MIGRATE_HOT:
+ return damon_hot_score(context, r, scheme);
+ case DAMOS_MIGRATE_COLD:
+ return damon_cold_score(context, r, scheme);
+ default:
+ break;
+ }
+
+ return DAMOS_MAX_SCORE;
+}
+
+static int __init damon_pa_initcall(void)
+{
+ struct damon_operations ops = {
+ .id = DAMON_OPS_PADDR,
+ .init = NULL,
+ .update = NULL,
+ .prepare_access_checks = damon_pa_prepare_access_checks,
+ .check_accesses = damon_pa_check_accesses,
+ .target_valid = NULL,
+ .cleanup = NULL,
+ .apply_scheme = damon_pa_apply_scheme,
+ .get_scheme_score = damon_pa_scheme_score,
+ };
+
+ return damon_register_ops(&ops);
+};
+
+subsys_initcall(damon_pa_initcall);
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
new file mode 100644
index 000000000000..36a582e09eae
--- /dev/null
+++ b/mm/damon/reclaim.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON-based page reclamation
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon-reclaim: " fmt
+
+#include <linux/damon.h>
+#include <linux/kstrtox.h>
+#include <linux/module.h>
+
+#include "modules-common.h"
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "damon_reclaim."
+
+/*
+ * Enable or disable DAMON_RECLAIM.
+ *
+ * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``.
+ * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could
+ * do no real monitoring and reclamation due to the watermarks-based activation
+ * condition. Refer to below descriptions for the watermarks parameter for
+ * this.
+ */
+static bool enabled __read_mostly;
+
+/*
+ * Make DAMON_RECLAIM reads the input parameters again, except ``enabled``.
+ *
+ * Input parameters that updated while DAMON_RECLAIM is running are not applied
+ * by default. Once this parameter is set as ``Y``, DAMON_RECLAIM reads values
+ * of parametrs except ``enabled`` again. Once the re-reading is done, this
+ * parameter is set as ``N``. If invalid parameters are found while the
+ * re-reading, DAMON_RECLAIM will be disabled.
+ */
+static bool commit_inputs __read_mostly;
+module_param(commit_inputs, bool, 0600);
+
+/*
+ * Time threshold for cold memory regions identification in microseconds.
+ *
+ * If a memory region is not accessed for this or longer time, DAMON_RECLAIM
+ * identifies the region as cold, and reclaims. 120 seconds by default.
+ */
+static unsigned long min_age __read_mostly = 120000000;
+module_param(min_age, ulong, 0600);
+
+static struct damos_quota damon_reclaim_quota = {
+ /* use up to 10 ms time, reclaim up to 128 MiB per 1 sec by default */
+ .ms = 10,
+ .sz = 128 * 1024 * 1024,
+ .reset_interval = 1000,
+ /* Within the quota, page out older regions first. */
+ .weight_sz = 0,
+ .weight_nr_accesses = 0,
+ .weight_age = 1
+};
+DEFINE_DAMON_MODULES_DAMOS_QUOTAS(damon_reclaim_quota);
+
+/*
+ * Desired level of memory pressure-stall time in microseconds.
+ *
+ * While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+ * increases and decreases the effective level of the quota aiming this level of
+ * memory pressure is incurred. System-wide ``some`` memory PSI in microseconds
+ * per quota reset interval (``quota_reset_interval_ms``) is collected and
+ * compared to this value to see if the aim is satisfied. Value zero means
+ * disabling this auto-tuning feature.
+ *
+ * Disabled by default.
+ */
+static unsigned long quota_mem_pressure_us __read_mostly;
+module_param(quota_mem_pressure_us, ulong, 0600);
+
+/*
+ * User-specifiable feedback for auto-tuning of the effective quota.
+ *
+ * While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+ * increases and decreases the effective level of the quota aiming receiving this
+ * feedback of value ``10,000`` from the user. DAMON_RECLAIM assumes the feedback
+ * value and the quota are positively proportional. Value zero means disabling
+ * this auto-tuning feature.
+ *
+ * Disabled by default.
+ *
+ */
+static unsigned long quota_autotune_feedback __read_mostly;
+module_param(quota_autotune_feedback, ulong, 0600);
+
+static struct damos_watermarks damon_reclaim_wmarks = {
+ .metric = DAMOS_WMARK_FREE_MEM_RATE,
+ .interval = 5000000, /* 5 seconds */
+ .high = 500, /* 50 percent */
+ .mid = 400, /* 40 percent */
+ .low = 200, /* 20 percent */
+};
+DEFINE_DAMON_MODULES_WMARKS_PARAMS(damon_reclaim_wmarks);
+
+static struct damon_attrs damon_reclaim_mon_attrs = {
+ .sample_interval = 5000, /* 5 ms */
+ .aggr_interval = 100000, /* 100 ms */
+ .ops_update_interval = 0,
+ .min_nr_regions = 10,
+ .max_nr_regions = 1000,
+};
+DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(damon_reclaim_mon_attrs);
+
+/*
+ * Start of the target memory region in physical address.
+ *
+ * The start physical address of memory region that DAMON_RECLAIM will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_start __read_mostly;
+module_param(monitor_region_start, ulong, 0600);
+
+/*
+ * End of the target memory region in physical address.
+ *
+ * The end physical address of memory region that DAMON_RECLAIM will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_end __read_mostly;
+module_param(monitor_region_end, ulong, 0600);
+
+/*
+ * Scale factor for DAMON_RECLAIM to ops address conversion.
+ *
+ * This parameter must not be set to 0.
+ */
+static unsigned long addr_unit __read_mostly = 1;
+
+/*
+ * Skip anonymous pages reclamation.
+ *
+ * If this parameter is set as ``Y``, DAMON_RECLAIM does not reclaim anonymous
+ * pages. By default, ``N``.
+ */
+static bool skip_anon __read_mostly;
+module_param(skip_anon, bool, 0600);
+
+/*
+ * PID of the DAMON thread
+ *
+ * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
+ * Else, -1.
+ */
+static int kdamond_pid __read_mostly = -1;
+module_param(kdamond_pid, int, 0400);
+
+static struct damos_stat damon_reclaim_stat;
+DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_reclaim_stat,
+ reclaim_tried_regions, reclaimed_regions, quota_exceeds);
+
+static struct damon_ctx *ctx;
+static struct damon_target *target;
+
+static struct damos *damon_reclaim_new_scheme(void)
+{
+ struct damos_access_pattern pattern = {
+ /* Find regions having PAGE_SIZE or larger size */
+ .min_sz_region = PAGE_SIZE,
+ .max_sz_region = ULONG_MAX,
+ /* and not accessed at all */
+ .min_nr_accesses = 0,
+ .max_nr_accesses = 0,
+ /* for min_age or more micro-seconds */
+ .min_age_region = min_age /
+ damon_reclaim_mon_attrs.aggr_interval,
+ .max_age_region = UINT_MAX,
+ };
+
+ return damon_new_scheme(
+ &pattern,
+ /* page out those, as soon as found */
+ DAMOS_PAGEOUT,
+ /* for each aggregation interval */
+ 0,
+ /* under the quota. */
+ &damon_reclaim_quota,
+ /* (De)activate this according to the watermarks. */
+ &damon_reclaim_wmarks,
+ NUMA_NO_NODE);
+}
+
+static int damon_reclaim_apply_parameters(void)
+{
+ struct damon_ctx *param_ctx;
+ struct damon_target *param_target;
+ struct damos *scheme;
+ struct damos_quota_goal *goal;
+ struct damos_filter *filter;
+ int err;
+
+ err = damon_modules_new_paddr_ctx_target(&param_ctx, &param_target);
+ if (err)
+ return err;
+
+ /*
+ * If monitor_region_start/end are unset, always silently
+ * reset addr_unit to 1.
+ */
+ if (!monitor_region_start && !monitor_region_end)
+ addr_unit = 1;
+ param_ctx->addr_unit = addr_unit;
+ param_ctx->min_sz_region = max(DAMON_MIN_REGION / addr_unit, 1);
+
+ if (!damon_reclaim_mon_attrs.aggr_interval) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = damon_set_attrs(param_ctx, &damon_reclaim_mon_attrs);
+ if (err)
+ goto out;
+
+ err = -ENOMEM;
+ scheme = damon_reclaim_new_scheme();
+ if (!scheme)
+ goto out;
+ damon_set_schemes(param_ctx, &scheme, 1);
+
+ if (quota_mem_pressure_us) {
+ goal = damos_new_quota_goal(DAMOS_QUOTA_SOME_MEM_PSI_US,
+ quota_mem_pressure_us);
+ if (!goal)
+ goto out;
+ damos_add_quota_goal(&scheme->quota, goal);
+ }
+
+ if (quota_autotune_feedback) {
+ goal = damos_new_quota_goal(DAMOS_QUOTA_USER_INPUT, 10000);
+ if (!goal)
+ goto out;
+ goal->current_value = quota_autotune_feedback;
+ damos_add_quota_goal(&scheme->quota, goal);
+ }
+
+ if (skip_anon) {
+ filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true, false);
+ if (!filter)
+ goto out;
+ damos_add_filter(scheme, filter);
+ }
+
+ err = damon_set_region_biggest_system_ram_default(param_target,
+ &monitor_region_start,
+ &monitor_region_end,
+ param_ctx->min_sz_region);
+ if (err)
+ goto out;
+ err = damon_commit_ctx(ctx, param_ctx);
+out:
+ damon_destroy_ctx(param_ctx);
+ return err;
+}
+
+static int damon_reclaim_handle_commit_inputs(void)
+{
+ int err;
+
+ if (!commit_inputs)
+ return 0;
+
+ err = damon_reclaim_apply_parameters();
+ commit_inputs = false;
+ return err;
+}
+
+static int damon_reclaim_damon_call_fn(void *arg)
+{
+ struct damon_ctx *c = arg;
+ struct damos *s;
+
+ /* update the stats parameter */
+ damon_for_each_scheme(s, c)
+ damon_reclaim_stat = s->stat;
+
+ return damon_reclaim_handle_commit_inputs();
+}
+
+static struct damon_call_control call_control = {
+ .fn = damon_reclaim_damon_call_fn,
+ .repeat = true,
+};
+
+static int damon_reclaim_turn(bool on)
+{
+ int err;
+
+ if (!on) {
+ err = damon_stop(&ctx, 1);
+ if (!err)
+ kdamond_pid = -1;
+ return err;
+ }
+
+ err = damon_reclaim_apply_parameters();
+ if (err)
+ return err;
+
+ err = damon_start(&ctx, 1, true);
+ if (err)
+ return err;
+ kdamond_pid = ctx->kdamond->pid;
+ return damon_call(ctx, &call_control);
+}
+
+static int damon_reclaim_addr_unit_store(const char *val,
+ const struct kernel_param *kp)
+{
+ unsigned long input_addr_unit;
+ int err = kstrtoul(val, 0, &input_addr_unit);
+
+ if (err)
+ return err;
+ if (!input_addr_unit)
+ return -EINVAL;
+
+ addr_unit = input_addr_unit;
+ return 0;
+}
+
+static const struct kernel_param_ops addr_unit_param_ops = {
+ .set = damon_reclaim_addr_unit_store,
+ .get = param_get_ulong,
+};
+
+module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600);
+MODULE_PARM_DESC(addr_unit,
+ "Scale factor for DAMON_RECLAIM to ops address conversion (default: 1)");
+
+static int damon_reclaim_enabled_store(const char *val,
+ const struct kernel_param *kp)
+{
+ bool is_enabled = enabled;
+ bool enable;
+ int err;
+
+ err = kstrtobool(val, &enable);
+ if (err)
+ return err;
+
+ if (is_enabled == enable)
+ return 0;
+
+ /* Called before init function. The function will handle this. */
+ if (!damon_initialized())
+ goto set_param_out;
+
+ err = damon_reclaim_turn(enable);
+ if (err)
+ return err;
+
+set_param_out:
+ enabled = enable;
+ return err;
+}
+
+static const struct kernel_param_ops enabled_param_ops = {
+ .set = damon_reclaim_enabled_store,
+ .get = param_get_bool,
+};
+
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+MODULE_PARM_DESC(enabled,
+ "Enable or disable DAMON_RECLAIM (default: disabled)");
+
+static int __init damon_reclaim_init(void)
+{
+ int err;
+
+ if (!damon_initialized()) {
+ err = -ENOMEM;
+ goto out;
+ }
+ err = damon_modules_new_paddr_ctx_target(&ctx, &target);
+ if (err)
+ goto out;
+
+ call_control.data = ctx;
+
+ /* 'enabled' has set before this function, probably via command line */
+ if (enabled)
+ err = damon_reclaim_turn(true);
+
+out:
+ if (err && enabled)
+ enabled = false;
+ return err;
+}
+
+module_init(damon_reclaim_init);
diff --git a/mm/damon/stat.c b/mm/damon/stat.c
new file mode 100644
index 000000000000..ed8e3629d31a
--- /dev/null
+++ b/mm/damon/stat.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shows data access monitoring resutls in simple metrics.
+ */
+
+#define pr_fmt(fmt) "damon-stat: " fmt
+
+#include <linux/damon.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sort.h>
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "damon_stat."
+
+static int damon_stat_enabled_store(
+ const char *val, const struct kernel_param *kp);
+
+static const struct kernel_param_ops enabled_param_ops = {
+ .set = damon_stat_enabled_store,
+ .get = param_get_bool,
+};
+
+static bool enabled __read_mostly = IS_ENABLED(
+ CONFIG_DAMON_STAT_ENABLED_DEFAULT);
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+MODULE_PARM_DESC(enabled, "Enable of disable DAMON_STAT");
+
+static unsigned long estimated_memory_bandwidth __read_mostly;
+module_param(estimated_memory_bandwidth, ulong, 0400);
+MODULE_PARM_DESC(estimated_memory_bandwidth,
+ "Estimated memory bandwidth usage in bytes per second");
+
+static long memory_idle_ms_percentiles[101] __read_mostly = {0,};
+module_param_array(memory_idle_ms_percentiles, long, NULL, 0400);
+MODULE_PARM_DESC(memory_idle_ms_percentiles,
+ "Memory idle time percentiles in milliseconds");
+
+static unsigned long aggr_interval_us;
+module_param(aggr_interval_us, ulong, 0400);
+MODULE_PARM_DESC(aggr_interval_us,
+ "Current tuned aggregation interval in microseconds");
+
+static struct damon_ctx *damon_stat_context;
+
+static unsigned long damon_stat_last_refresh_jiffies;
+
+static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned long access_bytes = 0;
+
+ damon_for_each_target(t, c) {
+ damon_for_each_region(r, t)
+ access_bytes += (r->ar.end - r->ar.start) *
+ r->nr_accesses;
+ }
+ estimated_memory_bandwidth = access_bytes * USEC_PER_MSEC *
+ MSEC_PER_SEC / c->attrs.aggr_interval;
+}
+
+static int damon_stat_idletime(const struct damon_region *r)
+{
+ if (r->nr_accesses)
+ return -1 * (r->age + 1);
+ return r->age + 1;
+}
+
+static int damon_stat_cmp_regions(const void *a, const void *b)
+{
+ const struct damon_region *ra = *(const struct damon_region **)a;
+ const struct damon_region *rb = *(const struct damon_region **)b;
+
+ return damon_stat_idletime(ra) - damon_stat_idletime(rb);
+}
+
+static int damon_stat_sort_regions(struct damon_ctx *c,
+ struct damon_region ***sorted_ptr, int *nr_regions_ptr,
+ unsigned long *total_sz_ptr)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ struct damon_region **region_pointers;
+ unsigned int nr_regions = 0;
+ unsigned long total_sz = 0;
+
+ damon_for_each_target(t, c) {
+ /* there is only one target */
+ region_pointers = kmalloc_array(damon_nr_regions(t),
+ sizeof(*region_pointers), GFP_KERNEL);
+ if (!region_pointers)
+ return -ENOMEM;
+ damon_for_each_region(r, t) {
+ region_pointers[nr_regions++] = r;
+ total_sz += r->ar.end - r->ar.start;
+ }
+ }
+ sort(region_pointers, nr_regions, sizeof(*region_pointers),
+ damon_stat_cmp_regions, NULL);
+ *sorted_ptr = region_pointers;
+ *nr_regions_ptr = nr_regions;
+ *total_sz_ptr = total_sz;
+ return 0;
+}
+
+static void damon_stat_set_idletime_percentiles(struct damon_ctx *c)
+{
+ struct damon_region **sorted_regions, *region;
+ int nr_regions;
+ unsigned long total_sz, accounted_bytes = 0;
+ int err, i, next_percentile = 0;
+
+ err = damon_stat_sort_regions(c, &sorted_regions, &nr_regions,
+ &total_sz);
+ if (err)
+ return;
+ for (i = 0; i < nr_regions; i++) {
+ region = sorted_regions[i];
+ accounted_bytes += region->ar.end - region->ar.start;
+ while (next_percentile <= accounted_bytes * 100 / total_sz)
+ memory_idle_ms_percentiles[next_percentile++] =
+ damon_stat_idletime(region) *
+ (long)c->attrs.aggr_interval / USEC_PER_MSEC;
+ }
+ kfree(sorted_regions);
+}
+
+static int damon_stat_damon_call_fn(void *data)
+{
+ struct damon_ctx *c = data;
+
+ /* avoid unnecessarily frequent stat update */
+ if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies +
+ msecs_to_jiffies(5 * MSEC_PER_SEC)))
+ return 0;
+ damon_stat_last_refresh_jiffies = jiffies;
+
+ aggr_interval_us = c->attrs.aggr_interval;
+ damon_stat_set_estimated_memory_bandwidth(c);
+ damon_stat_set_idletime_percentiles(c);
+ return 0;
+}
+
+static struct damon_ctx *damon_stat_build_ctx(void)
+{
+ struct damon_ctx *ctx;
+ struct damon_attrs attrs;
+ struct damon_target *target;
+ unsigned long start = 0, end = 0;
+
+ ctx = damon_new_ctx();
+ if (!ctx)
+ return NULL;
+ attrs = (struct damon_attrs) {
+ .sample_interval = 5 * USEC_PER_MSEC,
+ .aggr_interval = 100 * USEC_PER_MSEC,
+ .ops_update_interval = 60 * USEC_PER_MSEC * MSEC_PER_SEC,
+ .min_nr_regions = 10,
+ .max_nr_regions = 1000,
+ };
+ /*
+ * auto-tune sampling and aggregation interval aiming 4% DAMON-observed
+ * accesses ratio, keeping sampling interval in [5ms, 10s] range.
+ */
+ attrs.intervals_goal = (struct damon_intervals_goal) {
+ .access_bp = 400, .aggrs = 3,
+ .min_sample_us = 5000, .max_sample_us = 10000000,
+ };
+ if (damon_set_attrs(ctx, &attrs))
+ goto free_out;
+
+ /*
+ * auto-tune sampling and aggregation interval aiming 4% DAMON-observed
+ * accesses ratio, keeping sampling interval in [5ms, 10s] range.
+ */
+ ctx->attrs.intervals_goal = (struct damon_intervals_goal) {
+ .access_bp = 400, .aggrs = 3,
+ .min_sample_us = 5000, .max_sample_us = 10000000,
+ };
+ if (damon_select_ops(ctx, DAMON_OPS_PADDR))
+ goto free_out;
+
+ target = damon_new_target();
+ if (!target)
+ goto free_out;
+ damon_add_target(ctx, target);
+ if (damon_set_region_biggest_system_ram_default(target, &start, &end,
+ ctx->min_sz_region))
+ goto free_out;
+ return ctx;
+free_out:
+ damon_destroy_ctx(ctx);
+ return NULL;
+}
+
+static struct damon_call_control call_control = {
+ .fn = damon_stat_damon_call_fn,
+ .repeat = true,
+};
+
+static int damon_stat_start(void)
+{
+ int err;
+
+ damon_stat_context = damon_stat_build_ctx();
+ if (!damon_stat_context)
+ return -ENOMEM;
+ err = damon_start(&damon_stat_context, 1, true);
+ if (err)
+ return err;
+
+ damon_stat_last_refresh_jiffies = jiffies;
+ call_control.data = damon_stat_context;
+ return damon_call(damon_stat_context, &call_control);
+}
+
+static void damon_stat_stop(void)
+{
+ damon_stop(&damon_stat_context, 1);
+ damon_destroy_ctx(damon_stat_context);
+}
+
+static int damon_stat_enabled_store(
+ const char *val, const struct kernel_param *kp)
+{
+ bool is_enabled = enabled;
+ int err;
+
+ err = kstrtobool(val, &enabled);
+ if (err)
+ return err;
+
+ if (is_enabled == enabled)
+ return 0;
+
+ if (!damon_initialized())
+ /*
+ * probably called from command line parsing (parse_args()).
+ * Cannot call damon_new_ctx(). Let damon_stat_init() handle.
+ */
+ return 0;
+
+ if (enabled) {
+ err = damon_stat_start();
+ if (err)
+ enabled = false;
+ return err;
+ }
+ damon_stat_stop();
+ return 0;
+}
+
+static int __init damon_stat_init(void)
+{
+ int err = 0;
+
+ if (!damon_initialized()) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* probably set via command line */
+ if (enabled)
+ err = damon_stat_start();
+
+out:
+ if (err && enabled)
+ enabled = false;
+ return err;
+}
+
+module_init(damon_stat_init);
diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c
new file mode 100644
index 000000000000..ffaf285e241a
--- /dev/null
+++ b/mm/damon/sysfs-common.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Code for DAMON Sysfs Interface
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/slab.h>
+
+#include "sysfs-common.h"
+
+DEFINE_MUTEX(damon_sysfs_lock);
+
+/*
+ * unsigned long range directory
+ */
+
+struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
+ unsigned long min,
+ unsigned long max)
+{
+ struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range),
+ GFP_KERNEL);
+
+ if (!range)
+ return NULL;
+ range->kobj = (struct kobject){};
+ range->min = min;
+ range->max = max;
+
+ return range;
+}
+
+static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+
+ return sysfs_emit(buf, "%lu\n", range->min);
+}
+
+static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+ unsigned long min;
+ int err;
+
+ err = kstrtoul(buf, 0, &min);
+ if (err)
+ return err;
+
+ range->min = min;
+ return count;
+}
+
+static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+
+ return sysfs_emit(buf, "%lu\n", range->max);
+}
+
+static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+ unsigned long max;
+ int err;
+
+ err = kstrtoul(buf, 0, &max);
+ if (err)
+ return err;
+
+ range->max = max;
+ return count;
+}
+
+void damon_sysfs_ul_range_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_ul_range_min_attr =
+ __ATTR_RW_MODE(min, 0600);
+
+static struct kobj_attribute damon_sysfs_ul_range_max_attr =
+ __ATTR_RW_MODE(max, 0600);
+
+static struct attribute *damon_sysfs_ul_range_attrs[] = {
+ &damon_sysfs_ul_range_min_attr.attr,
+ &damon_sysfs_ul_range_max_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_ul_range);
+
+const struct kobj_type damon_sysfs_ul_range_ktype = {
+ .release = damon_sysfs_ul_range_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_ul_range_groups,
+};
+
diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h
new file mode 100644
index 000000000000..2099adee11d0
--- /dev/null
+++ b/mm/damon/sysfs-common.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common Code for DAMON Sysfs Interface
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/damon.h>
+#include <linux/kobject.h>
+
+extern struct mutex damon_sysfs_lock;
+
+struct damon_sysfs_ul_range {
+ struct kobject kobj;
+ unsigned long min;
+ unsigned long max;
+};
+
+struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
+ unsigned long min,
+ unsigned long max);
+void damon_sysfs_ul_range_release(struct kobject *kobj);
+
+extern const struct kobj_type damon_sysfs_ul_range_ktype;
+
+/*
+ * schemes directory
+ */
+
+struct damon_sysfs_schemes {
+ struct kobject kobj;
+ struct damon_sysfs_scheme **schemes_arr;
+ int nr;
+};
+
+struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void);
+void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes);
+
+extern const struct kobj_type damon_sysfs_schemes_ktype;
+
+int damon_sysfs_add_schemes(struct damon_ctx *ctx,
+ struct damon_sysfs_schemes *sysfs_schemes);
+
+void damon_sysfs_schemes_update_stats(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx);
+
+void damos_sysfs_populate_region_dir(struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx, struct damon_target *t,
+ struct damon_region *r, struct damos *s,
+ bool total_bytes_only, unsigned long sz_filter_passed);
+
+int damon_sysfs_schemes_clear_regions(
+ struct damon_sysfs_schemes *sysfs_schemes);
+
+int damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx);
+
+void damos_sysfs_update_effective_quotas(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx);
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
new file mode 100644
index 000000000000..30d20f5b3192
--- /dev/null
+++ b/mm/damon/sysfs-schemes.c
@@ -0,0 +1,2832 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON sysfs Interface
+ *
+ * Copyright (c) 2022 SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/slab.h>
+#include <linux/numa.h>
+
+#include "sysfs-common.h"
+
+/*
+ * scheme region directory
+ */
+
+struct damon_sysfs_scheme_region {
+ struct kobject kobj;
+ struct damon_addr_range ar;
+ unsigned int nr_accesses;
+ unsigned int age;
+ unsigned long sz_filter_passed;
+ struct list_head list;
+};
+
+static struct damon_sysfs_scheme_region *damon_sysfs_scheme_region_alloc(
+ struct damon_region *region)
+{
+ struct damon_sysfs_scheme_region *sysfs_region = kmalloc(
+ sizeof(*sysfs_region), GFP_KERNEL);
+
+ if (!sysfs_region)
+ return NULL;
+ sysfs_region->kobj = (struct kobject){};
+ sysfs_region->ar = region->ar;
+ sysfs_region->nr_accesses = region->nr_accesses_bp / 10000;
+ sysfs_region->age = region->age;
+ INIT_LIST_HEAD(&sysfs_region->list);
+ return sysfs_region;
+}
+
+static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->ar.start);
+}
+
+static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->ar.end);
+}
+
+static ssize_t nr_accesses_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ return sysfs_emit(buf, "%u\n", region->nr_accesses);
+}
+
+static ssize_t age_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ return sysfs_emit(buf, "%u\n", region->age);
+}
+
+static ssize_t sz_filter_passed_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->sz_filter_passed);
+}
+
+static void damon_sysfs_scheme_region_release(struct kobject *kobj)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ list_del(&region->list);
+ kfree(region);
+}
+
+static struct kobj_attribute damon_sysfs_scheme_region_start_attr =
+ __ATTR_RO_MODE(start, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_end_attr =
+ __ATTR_RO_MODE(end, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_nr_accesses_attr =
+ __ATTR_RO_MODE(nr_accesses, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_age_attr =
+ __ATTR_RO_MODE(age, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_sz_filter_passed_attr =
+ __ATTR_RO_MODE(sz_filter_passed, 0400);
+
+static struct attribute *damon_sysfs_scheme_region_attrs[] = {
+ &damon_sysfs_scheme_region_start_attr.attr,
+ &damon_sysfs_scheme_region_end_attr.attr,
+ &damon_sysfs_scheme_region_nr_accesses_attr.attr,
+ &damon_sysfs_scheme_region_age_attr.attr,
+ &damon_sysfs_scheme_region_sz_filter_passed_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_region);
+
+static const struct kobj_type damon_sysfs_scheme_region_ktype = {
+ .release = damon_sysfs_scheme_region_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_region_groups,
+};
+
+/*
+ * scheme regions directory
+ */
+
+struct damon_sysfs_scheme_regions {
+ struct kobject kobj;
+ struct list_head regions_list;
+ int nr_regions;
+ unsigned long total_bytes;
+};
+
+static struct damon_sysfs_scheme_regions *
+damon_sysfs_scheme_regions_alloc(void)
+{
+ struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions),
+ GFP_KERNEL);
+
+ if (!regions)
+ return NULL;
+
+ regions->kobj = (struct kobject){};
+ INIT_LIST_HEAD(&regions->regions_list);
+ regions->nr_regions = 0;
+ regions->total_bytes = 0;
+ return regions;
+}
+
+static ssize_t total_bytes_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_regions *regions = container_of(kobj,
+ struct damon_sysfs_scheme_regions, kobj);
+
+ return sysfs_emit(buf, "%lu\n", regions->total_bytes);
+}
+
+static void damon_sysfs_scheme_regions_rm_dirs(
+ struct damon_sysfs_scheme_regions *regions)
+{
+ struct damon_sysfs_scheme_region *r, *next;
+
+ list_for_each_entry_safe(r, next, &regions->regions_list, list) {
+ /* release function deletes it from the list */
+ kobject_put(&r->kobj);
+ regions->nr_regions--;
+ }
+}
+
+static void damon_sysfs_scheme_regions_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_scheme_regions, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_scheme_regions_total_bytes_attr =
+ __ATTR_RO_MODE(total_bytes, 0400);
+
+static struct attribute *damon_sysfs_scheme_regions_attrs[] = {
+ &damon_sysfs_scheme_regions_total_bytes_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_regions);
+
+static const struct kobj_type damon_sysfs_scheme_regions_ktype = {
+ .release = damon_sysfs_scheme_regions_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_regions_groups,
+};
+
+/*
+ * schemes/stats directory
+ */
+
+struct damon_sysfs_stats {
+ struct kobject kobj;
+ unsigned long nr_tried;
+ unsigned long sz_tried;
+ unsigned long nr_applied;
+ unsigned long sz_applied;
+ unsigned long sz_ops_filter_passed;
+ unsigned long qt_exceeds;
+};
+
+static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL);
+}
+
+static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->nr_tried);
+}
+
+static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->sz_tried);
+}
+
+static ssize_t nr_applied_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->nr_applied);
+}
+
+static ssize_t sz_applied_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->sz_applied);
+}
+
+static ssize_t sz_ops_filter_passed_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->sz_ops_filter_passed);
+}
+
+static ssize_t qt_exceeds_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->qt_exceeds);
+}
+
+static void damon_sysfs_stats_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_stats, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_stats_nr_tried_attr =
+ __ATTR_RO_MODE(nr_tried, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_sz_tried_attr =
+ __ATTR_RO_MODE(sz_tried, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_nr_applied_attr =
+ __ATTR_RO_MODE(nr_applied, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_sz_applied_attr =
+ __ATTR_RO_MODE(sz_applied, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_sz_ops_filter_passed_attr =
+ __ATTR_RO_MODE(sz_ops_filter_passed, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr =
+ __ATTR_RO_MODE(qt_exceeds, 0400);
+
+static struct attribute *damon_sysfs_stats_attrs[] = {
+ &damon_sysfs_stats_nr_tried_attr.attr,
+ &damon_sysfs_stats_sz_tried_attr.attr,
+ &damon_sysfs_stats_nr_applied_attr.attr,
+ &damon_sysfs_stats_sz_applied_attr.attr,
+ &damon_sysfs_stats_sz_ops_filter_passed_attr.attr,
+ &damon_sysfs_stats_qt_exceeds_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_stats);
+
+static const struct kobj_type damon_sysfs_stats_ktype = {
+ .release = damon_sysfs_stats_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_stats_groups,
+};
+
+/*
+ * filter directory
+ */
+
+/*
+ * enum damos_sysfs_filter_handle_layer - Layers handling filters of a dir.
+ */
+enum damos_sysfs_filter_handle_layer {
+ DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE,
+ DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS,
+ DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH,
+};
+
+struct damon_sysfs_scheme_filter {
+ struct kobject kobj;
+ enum damos_sysfs_filter_handle_layer handle_layer;
+ enum damos_filter_type type;
+ bool matching;
+ bool allow;
+ char *memcg_path;
+ struct damon_addr_range addr_range;
+ struct damon_size_range sz_range;
+ int target_idx;
+};
+
+static struct damon_sysfs_scheme_filter *damon_sysfs_scheme_filter_alloc(
+ enum damos_sysfs_filter_handle_layer layer)
+{
+ struct damon_sysfs_scheme_filter *filter;
+
+ filter = kzalloc(sizeof(struct damon_sysfs_scheme_filter), GFP_KERNEL);
+ if (filter)
+ filter->handle_layer = layer;
+ return filter;
+}
+
+struct damos_sysfs_filter_type_name {
+ enum damos_filter_type type;
+ char *name;
+};
+
+static const struct damos_sysfs_filter_type_name
+damos_sysfs_filter_type_names[] = {
+ {
+ .type = DAMOS_FILTER_TYPE_ANON,
+ .name = "anon",
+ },
+ {
+ .type = DAMOS_FILTER_TYPE_ACTIVE,
+ .name = "active",
+ },
+ {
+ .type = DAMOS_FILTER_TYPE_MEMCG,
+ .name = "memcg",
+ },
+ {
+ .type = DAMOS_FILTER_TYPE_YOUNG,
+ .name = "young",
+ },
+ {
+ .type = DAMOS_FILTER_TYPE_HUGEPAGE_SIZE,
+ .name = "hugepage_size",
+ },
+ {
+ .type = DAMOS_FILTER_TYPE_UNMAPPED,
+ .name = "unmapped",
+ },
+ {
+ .type = DAMOS_FILTER_TYPE_ADDR,
+ .name = "addr",
+ },
+ {
+ .type = DAMOS_FILTER_TYPE_TARGET,
+ .name = "target",
+ },
+};
+
+static ssize_t type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damos_sysfs_filter_type_names); i++) {
+ const struct damos_sysfs_filter_type_name *type_name;
+
+ type_name = &damos_sysfs_filter_type_names[i];
+ if (type_name->type == filter->type)
+ return sysfs_emit(buf, "%s\n", type_name->name);
+ }
+ return -EINVAL;
+}
+
+static bool damos_sysfs_scheme_filter_valid_type(
+ enum damos_sysfs_filter_handle_layer layer,
+ enum damos_filter_type type)
+{
+ switch (layer) {
+ case DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH:
+ return true;
+ case DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE:
+ return !damos_filter_for_ops(type);
+ case DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS:
+ return damos_filter_for_ops(type);
+ default:
+ break;
+ }
+ return false;
+}
+
+static ssize_t type_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ ssize_t ret = -EINVAL;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damos_sysfs_filter_type_names); i++) {
+ const struct damos_sysfs_filter_type_name *type_name;
+
+ type_name = &damos_sysfs_filter_type_names[i];
+ if (sysfs_streq(buf, type_name->name)) {
+ if (!damos_sysfs_scheme_filter_valid_type(
+ filter->handle_layer,
+ type_name->type))
+ break;
+ filter->type = type_name->type;
+ ret = count;
+ break;
+ }
+ }
+ return ret;
+}
+
+static ssize_t matching_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%c\n", filter->matching ? 'Y' : 'N');
+}
+
+static ssize_t matching_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ bool matching;
+ int err = kstrtobool(buf, &matching);
+
+ if (err)
+ return err;
+
+ filter->matching = matching;
+ return count;
+}
+
+static ssize_t allow_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%c\n", filter->allow ? 'Y' : 'N');
+}
+
+static ssize_t allow_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ bool allow;
+ int err = kstrtobool(buf, &allow);
+
+ if (err)
+ return err;
+
+ filter->allow = allow;
+ return count;
+}
+
+static ssize_t memcg_path_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ filter->memcg_path ? filter->memcg_path : "");
+}
+
+static ssize_t memcg_path_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ char *path = kmalloc_array(size_add(count, 1), sizeof(*path),
+ GFP_KERNEL);
+
+ if (!path)
+ return -ENOMEM;
+
+ strscpy(path, buf, count + 1);
+ kfree(filter->memcg_path);
+ filter->memcg_path = path;
+ return count;
+}
+
+static ssize_t addr_start_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%lu\n", filter->addr_range.start);
+}
+
+static ssize_t addr_start_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ int err = kstrtoul(buf, 0, &filter->addr_range.start);
+
+ return err ? err : count;
+}
+
+static ssize_t addr_end_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%lu\n", filter->addr_range.end);
+}
+
+static ssize_t addr_end_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ int err = kstrtoul(buf, 0, &filter->addr_range.end);
+
+ return err ? err : count;
+}
+
+static ssize_t min_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%lu\n", filter->sz_range.min);
+}
+
+static ssize_t min_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ int err = kstrtoul(buf, 0, &filter->sz_range.min);
+
+ return err ? err : count;
+}
+
+static ssize_t max_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%lu\n", filter->sz_range.max);
+}
+
+static ssize_t max_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ int err = kstrtoul(buf, 0, &filter->sz_range.max);
+
+ return err ? err : count;
+}
+
+static ssize_t damon_target_idx_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%d\n", filter->target_idx);
+}
+
+static ssize_t damon_target_idx_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ int err = kstrtoint(buf, 0, &filter->target_idx);
+
+ return err ? err : count;
+}
+
+static void damon_sysfs_scheme_filter_release(struct kobject *kobj)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ kfree(filter->memcg_path);
+ kfree(filter);
+}
+
+static struct kobj_attribute damon_sysfs_scheme_filter_type_attr =
+ __ATTR_RW_MODE(type, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_matching_attr =
+ __ATTR_RW_MODE(matching, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_allow_attr =
+ __ATTR_RW_MODE(allow, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_memcg_path_attr =
+ __ATTR_RW_MODE(memcg_path, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_addr_start_attr =
+ __ATTR_RW_MODE(addr_start, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_addr_end_attr =
+ __ATTR_RW_MODE(addr_end, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_min_attr =
+ __ATTR_RW_MODE(min, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_max_attr =
+ __ATTR_RW_MODE(max, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_damon_target_idx_attr =
+ __ATTR_RW_MODE(damon_target_idx, 0600);
+
+static struct attribute *damon_sysfs_scheme_filter_attrs[] = {
+ &damon_sysfs_scheme_filter_type_attr.attr,
+ &damon_sysfs_scheme_filter_matching_attr.attr,
+ &damon_sysfs_scheme_filter_allow_attr.attr,
+ &damon_sysfs_scheme_filter_memcg_path_attr.attr,
+ &damon_sysfs_scheme_filter_addr_start_attr.attr,
+ &damon_sysfs_scheme_filter_addr_end_attr.attr,
+ &damon_sysfs_scheme_filter_min_attr.attr,
+ &damon_sysfs_scheme_filter_max_attr.attr,
+ &damon_sysfs_scheme_filter_damon_target_idx_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_filter);
+
+static const struct kobj_type damon_sysfs_scheme_filter_ktype = {
+ .release = damon_sysfs_scheme_filter_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_filter_groups,
+};
+
+/*
+ * filters directory
+ */
+
+struct damon_sysfs_scheme_filters {
+ struct kobject kobj;
+ enum damos_sysfs_filter_handle_layer handle_layer;
+ struct damon_sysfs_scheme_filter **filters_arr;
+ int nr;
+};
+
+static struct damon_sysfs_scheme_filters *
+damon_sysfs_scheme_filters_alloc(enum damos_sysfs_filter_handle_layer layer)
+{
+ struct damon_sysfs_scheme_filters *filters;
+
+ filters = kzalloc(sizeof(struct damon_sysfs_scheme_filters), GFP_KERNEL);
+ if (filters)
+ filters->handle_layer = layer;
+ return filters;
+}
+
+static void damon_sysfs_scheme_filters_rm_dirs(
+ struct damon_sysfs_scheme_filters *filters)
+{
+ struct damon_sysfs_scheme_filter **filters_arr = filters->filters_arr;
+ int i;
+
+ for (i = 0; i < filters->nr; i++)
+ kobject_put(&filters_arr[i]->kobj);
+ filters->nr = 0;
+ kfree(filters_arr);
+ filters->filters_arr = NULL;
+}
+
+static int damon_sysfs_scheme_filters_add_dirs(
+ struct damon_sysfs_scheme_filters *filters, int nr_filters)
+{
+ struct damon_sysfs_scheme_filter **filters_arr, *filter;
+ int err, i;
+
+ damon_sysfs_scheme_filters_rm_dirs(filters);
+ if (!nr_filters)
+ return 0;
+
+ filters_arr = kmalloc_array(nr_filters, sizeof(*filters_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!filters_arr)
+ return -ENOMEM;
+ filters->filters_arr = filters_arr;
+
+ for (i = 0; i < nr_filters; i++) {
+ filter = damon_sysfs_scheme_filter_alloc(
+ filters->handle_layer);
+ if (!filter) {
+ damon_sysfs_scheme_filters_rm_dirs(filters);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&filter->kobj,
+ &damon_sysfs_scheme_filter_ktype,
+ &filters->kobj, "%d", i);
+ if (err) {
+ kobject_put(&filter->kobj);
+ damon_sysfs_scheme_filters_rm_dirs(filters);
+ return err;
+ }
+
+ filters_arr[i] = filter;
+ filters->nr++;
+ }
+ return 0;
+}
+
+static ssize_t nr_filters_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filters *filters = container_of(kobj,
+ struct damon_sysfs_scheme_filters, kobj);
+
+ return sysfs_emit(buf, "%d\n", filters->nr);
+}
+
+static ssize_t nr_filters_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filters *filters;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ filters = container_of(kobj, struct damon_sysfs_scheme_filters, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_scheme_filters_add_dirs(filters, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_scheme_filters_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_scheme_filters, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_scheme_filters_nr_attr =
+ __ATTR_RW_MODE(nr_filters, 0600);
+
+static struct attribute *damon_sysfs_scheme_filters_attrs[] = {
+ &damon_sysfs_scheme_filters_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_filters);
+
+static const struct kobj_type damon_sysfs_scheme_filters_ktype = {
+ .release = damon_sysfs_scheme_filters_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_filters_groups,
+};
+
+/*
+ * watermarks directory
+ */
+
+struct damon_sysfs_watermarks {
+ struct kobject kobj;
+ enum damos_wmark_metric metric;
+ unsigned long interval_us;
+ unsigned long high;
+ unsigned long mid;
+ unsigned long low;
+};
+
+static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc(
+ enum damos_wmark_metric metric, unsigned long interval_us,
+ unsigned long high, unsigned long mid, unsigned long low)
+{
+ struct damon_sysfs_watermarks *watermarks = kmalloc(
+ sizeof(*watermarks), GFP_KERNEL);
+
+ if (!watermarks)
+ return NULL;
+ watermarks->kobj = (struct kobject){};
+ watermarks->metric = metric;
+ watermarks->interval_us = interval_us;
+ watermarks->high = high;
+ watermarks->mid = mid;
+ watermarks->low = low;
+ return watermarks;
+}
+
+struct damos_sysfs_wmark_metric_name {
+ enum damos_wmark_metric metric;
+ char *name;
+};
+
+static const struct damos_sysfs_wmark_metric_name
+damos_sysfs_wmark_metric_names[] = {
+ {
+ .metric = DAMOS_WMARK_NONE,
+ .name = "none",
+ },
+ {
+ .metric = DAMOS_WMARK_FREE_MEM_RATE,
+ .name = "free_mem_rate",
+ },
+};
+
+static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damos_sysfs_wmark_metric_names); i++) {
+ const struct damos_sysfs_wmark_metric_name *metric_name;
+
+ metric_name = &damos_sysfs_wmark_metric_names[i];
+ if (metric_name->metric == watermarks->metric)
+ return sysfs_emit(buf, "%s\n", metric_name->name);
+ }
+ return -EINVAL;
+}
+
+static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damos_sysfs_wmark_metric_names); i++) {
+ const struct damos_sysfs_wmark_metric_name *metric_name;
+
+ metric_name = &damos_sysfs_wmark_metric_names[i];
+ if (sysfs_streq(buf, metric_name->name)) {
+ watermarks->metric = metric_name->metric;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static ssize_t interval_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->interval_us);
+}
+
+static ssize_t interval_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->interval_us);
+
+ return err ? err : count;
+}
+
+static ssize_t high_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->high);
+}
+
+static ssize_t high_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->high);
+
+ return err ? err : count;
+}
+
+static ssize_t mid_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->mid);
+}
+
+static ssize_t mid_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->mid);
+
+ return err ? err : count;
+}
+
+static ssize_t low_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->low);
+}
+
+static ssize_t low_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->low);
+
+ return err ? err : count;
+}
+
+static void damon_sysfs_watermarks_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_watermarks_metric_attr =
+ __ATTR_RW_MODE(metric, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr =
+ __ATTR_RW_MODE(interval_us, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_high_attr =
+ __ATTR_RW_MODE(high, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_mid_attr =
+ __ATTR_RW_MODE(mid, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_low_attr =
+ __ATTR_RW_MODE(low, 0600);
+
+static struct attribute *damon_sysfs_watermarks_attrs[] = {
+ &damon_sysfs_watermarks_metric_attr.attr,
+ &damon_sysfs_watermarks_interval_us_attr.attr,
+ &damon_sysfs_watermarks_high_attr.attr,
+ &damon_sysfs_watermarks_mid_attr.attr,
+ &damon_sysfs_watermarks_low_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_watermarks);
+
+static const struct kobj_type damon_sysfs_watermarks_ktype = {
+ .release = damon_sysfs_watermarks_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_watermarks_groups,
+};
+
+/*
+ * quota goal directory
+ */
+
+struct damos_sysfs_quota_goal {
+ struct kobject kobj;
+ enum damos_quota_goal_metric metric;
+ unsigned long target_value;
+ unsigned long current_value;
+ int nid;
+ char *path;
+};
+
+static struct damos_sysfs_quota_goal *damos_sysfs_quota_goal_alloc(void)
+{
+ return kzalloc(sizeof(struct damos_sysfs_quota_goal), GFP_KERNEL);
+}
+
+struct damos_sysfs_qgoal_metric_name {
+ enum damos_quota_goal_metric metric;
+ char *name;
+};
+
+static
+struct damos_sysfs_qgoal_metric_name damos_sysfs_qgoal_metric_names[] = {
+ {
+ .metric = DAMOS_QUOTA_USER_INPUT,
+ .name = "user_input",
+ },
+ {
+ .metric = DAMOS_QUOTA_SOME_MEM_PSI_US,
+ .name = "some_mem_psi_us",
+ },
+ {
+ .metric = DAMOS_QUOTA_NODE_MEM_USED_BP,
+ .name = "node_mem_used_bp",
+ },
+ {
+ .metric = DAMOS_QUOTA_NODE_MEM_FREE_BP,
+ .name = "node_mem_free_bp",
+ },
+ {
+ .metric = DAMOS_QUOTA_NODE_MEMCG_USED_BP,
+ .name = "node_memcg_used_bp",
+ },
+ {
+ .metric = DAMOS_QUOTA_NODE_MEMCG_FREE_BP,
+ .name = "node_memcg_free_bp",
+ },
+};
+
+static ssize_t target_metric_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj,
+ struct damos_sysfs_quota_goal, kobj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damos_sysfs_qgoal_metric_names); i++) {
+ struct damos_sysfs_qgoal_metric_name *metric_name;
+
+ metric_name = &damos_sysfs_qgoal_metric_names[i];
+ if (metric_name->metric == goal->metric)
+ return sysfs_emit(buf, "%s\n", metric_name->name);
+ }
+ return -EINVAL;
+}
+
+static ssize_t target_metric_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj,
+ struct damos_sysfs_quota_goal, kobj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damos_sysfs_qgoal_metric_names); i++) {
+ struct damos_sysfs_qgoal_metric_name *metric_name;
+
+ metric_name = &damos_sysfs_qgoal_metric_names[i];
+ if (sysfs_streq(buf, metric_name->name)) {
+ goal->metric = metric_name->metric;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static ssize_t target_value_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj, struct
+ damos_sysfs_quota_goal, kobj);
+
+ return sysfs_emit(buf, "%lu\n", goal->target_value);
+}
+
+static ssize_t target_value_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj, struct
+ damos_sysfs_quota_goal, kobj);
+ int err = kstrtoul(buf, 0, &goal->target_value);
+
+ return err ? err : count;
+}
+
+static ssize_t current_value_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj, struct
+ damos_sysfs_quota_goal, kobj);
+
+ return sysfs_emit(buf, "%lu\n", goal->current_value);
+}
+
+static ssize_t current_value_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj, struct
+ damos_sysfs_quota_goal, kobj);
+ int err = kstrtoul(buf, 0, &goal->current_value);
+
+ /* feed callback should check existence of this file and read value */
+ return err ? err : count;
+}
+
+static ssize_t nid_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj, struct
+ damos_sysfs_quota_goal, kobj);
+
+
+ return sysfs_emit(buf, "%d\n", goal->nid);
+}
+
+static ssize_t nid_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj, struct
+ damos_sysfs_quota_goal, kobj);
+ int err = kstrtoint(buf, 0, &goal->nid);
+
+ /* feed callback should check existence of this file and read value */
+ return err ? err : count;
+}
+
+static ssize_t path_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj,
+ struct damos_sysfs_quota_goal, kobj);
+
+ return sysfs_emit(buf, "%s\n", goal->path ? goal->path : "");
+}
+
+static ssize_t path_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj,
+ struct damos_sysfs_quota_goal, kobj);
+ char *path = kmalloc_array(size_add(count, 1), sizeof(*path),
+ GFP_KERNEL);
+
+ if (!path)
+ return -ENOMEM;
+
+ strscpy(path, buf, count + 1);
+ kfree(goal->path);
+ goal->path = path;
+ return count;
+}
+
+static void damos_sysfs_quota_goal_release(struct kobject *kobj)
+{
+ struct damos_sysfs_quota_goal *goal = container_of(kobj,
+ struct damos_sysfs_quota_goal, kobj);
+
+ kfree(goal->path);
+ kfree(goal);
+}
+
+static struct kobj_attribute damos_sysfs_quota_goal_target_metric_attr =
+ __ATTR_RW_MODE(target_metric, 0600);
+
+static struct kobj_attribute damos_sysfs_quota_goal_target_value_attr =
+ __ATTR_RW_MODE(target_value, 0600);
+
+static struct kobj_attribute damos_sysfs_quota_goal_current_value_attr =
+ __ATTR_RW_MODE(current_value, 0600);
+
+static struct kobj_attribute damos_sysfs_quota_goal_nid_attr =
+ __ATTR_RW_MODE(nid, 0600);
+
+static struct kobj_attribute damos_sysfs_quota_goal_path_attr =
+ __ATTR_RW_MODE(path, 0600);
+
+static struct attribute *damos_sysfs_quota_goal_attrs[] = {
+ &damos_sysfs_quota_goal_target_metric_attr.attr,
+ &damos_sysfs_quota_goal_target_value_attr.attr,
+ &damos_sysfs_quota_goal_current_value_attr.attr,
+ &damos_sysfs_quota_goal_nid_attr.attr,
+ &damos_sysfs_quota_goal_path_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damos_sysfs_quota_goal);
+
+static const struct kobj_type damos_sysfs_quota_goal_ktype = {
+ .release = damos_sysfs_quota_goal_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damos_sysfs_quota_goal_groups,
+};
+
+/*
+ * quota goals directory
+ */
+
+struct damos_sysfs_quota_goals {
+ struct kobject kobj;
+ struct damos_sysfs_quota_goal **goals_arr; /* counted by nr */
+ int nr;
+};
+
+static struct damos_sysfs_quota_goals *damos_sysfs_quota_goals_alloc(void)
+{
+ return kzalloc(sizeof(struct damos_sysfs_quota_goals), GFP_KERNEL);
+}
+
+static void damos_sysfs_quota_goals_rm_dirs(
+ struct damos_sysfs_quota_goals *goals)
+{
+ struct damos_sysfs_quota_goal **goals_arr = goals->goals_arr;
+ int i;
+
+ for (i = 0; i < goals->nr; i++)
+ kobject_put(&goals_arr[i]->kobj);
+ goals->nr = 0;
+ kfree(goals_arr);
+ goals->goals_arr = NULL;
+}
+
+static int damos_sysfs_quota_goals_add_dirs(
+ struct damos_sysfs_quota_goals *goals, int nr_goals)
+{
+ struct damos_sysfs_quota_goal **goals_arr, *goal;
+ int err, i;
+
+ damos_sysfs_quota_goals_rm_dirs(goals);
+ if (!nr_goals)
+ return 0;
+
+ goals_arr = kmalloc_array(nr_goals, sizeof(*goals_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!goals_arr)
+ return -ENOMEM;
+ goals->goals_arr = goals_arr;
+
+ for (i = 0; i < nr_goals; i++) {
+ goal = damos_sysfs_quota_goal_alloc();
+ if (!goal) {
+ damos_sysfs_quota_goals_rm_dirs(goals);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&goal->kobj,
+ &damos_sysfs_quota_goal_ktype, &goals->kobj,
+ "%d", i);
+ if (err) {
+ kobject_put(&goal->kobj);
+ damos_sysfs_quota_goals_rm_dirs(goals);
+ return err;
+ }
+
+ goals_arr[i] = goal;
+ goals->nr++;
+ }
+ return 0;
+}
+
+static ssize_t nr_goals_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damos_sysfs_quota_goals *goals = container_of(kobj,
+ struct damos_sysfs_quota_goals, kobj);
+
+ return sysfs_emit(buf, "%d\n", goals->nr);
+}
+
+static ssize_t nr_goals_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damos_sysfs_quota_goals *goals;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ goals = container_of(kobj, struct damos_sysfs_quota_goals, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damos_sysfs_quota_goals_add_dirs(goals, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damos_sysfs_quota_goals_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damos_sysfs_quota_goals, kobj));
+}
+
+static struct kobj_attribute damos_sysfs_quota_goals_nr_attr =
+ __ATTR_RW_MODE(nr_goals, 0600);
+
+static struct attribute *damos_sysfs_quota_goals_attrs[] = {
+ &damos_sysfs_quota_goals_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damos_sysfs_quota_goals);
+
+static const struct kobj_type damos_sysfs_quota_goals_ktype = {
+ .release = damos_sysfs_quota_goals_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damos_sysfs_quota_goals_groups,
+};
+
+/*
+ * scheme/weights directory
+ */
+
+struct damon_sysfs_weights {
+ struct kobject kobj;
+ unsigned int sz;
+ unsigned int nr_accesses;
+ unsigned int age;
+};
+
+static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz,
+ unsigned int nr_accesses, unsigned int age)
+{
+ struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights),
+ GFP_KERNEL);
+
+ if (!weights)
+ return NULL;
+ weights->kobj = (struct kobject){};
+ weights->sz = sz;
+ weights->nr_accesses = nr_accesses;
+ weights->age = age;
+ return weights;
+}
+
+static ssize_t sz_permil_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+
+ return sysfs_emit(buf, "%u\n", weights->sz);
+}
+
+static ssize_t sz_permil_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+ int err = kstrtouint(buf, 0, &weights->sz);
+
+ return err ? err : count;
+}
+
+static ssize_t nr_accesses_permil_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+
+ return sysfs_emit(buf, "%u\n", weights->nr_accesses);
+}
+
+static ssize_t nr_accesses_permil_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+ int err = kstrtouint(buf, 0, &weights->nr_accesses);
+
+ return err ? err : count;
+}
+
+static ssize_t age_permil_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+
+ return sysfs_emit(buf, "%u\n", weights->age);
+}
+
+static ssize_t age_permil_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+ int err = kstrtouint(buf, 0, &weights->age);
+
+ return err ? err : count;
+}
+
+static void damon_sysfs_weights_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_weights, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_weights_sz_attr =
+ __ATTR_RW_MODE(sz_permil, 0600);
+
+static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr =
+ __ATTR_RW_MODE(nr_accesses_permil, 0600);
+
+static struct kobj_attribute damon_sysfs_weights_age_attr =
+ __ATTR_RW_MODE(age_permil, 0600);
+
+static struct attribute *damon_sysfs_weights_attrs[] = {
+ &damon_sysfs_weights_sz_attr.attr,
+ &damon_sysfs_weights_nr_accesses_attr.attr,
+ &damon_sysfs_weights_age_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_weights);
+
+static const struct kobj_type damon_sysfs_weights_ktype = {
+ .release = damon_sysfs_weights_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_weights_groups,
+};
+
+/*
+ * quotas directory
+ */
+
+struct damon_sysfs_quotas {
+ struct kobject kobj;
+ struct damon_sysfs_weights *weights;
+ struct damos_sysfs_quota_goals *goals;
+ unsigned long ms;
+ unsigned long sz;
+ unsigned long reset_interval_ms;
+ unsigned long effective_sz; /* Effective size quota in bytes */
+};
+
+static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL);
+}
+
+static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas)
+{
+ struct damon_sysfs_weights *weights;
+ struct damos_sysfs_quota_goals *goals;
+ int err;
+
+ weights = damon_sysfs_weights_alloc(0, 0, 0);
+ if (!weights)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype,
+ &quotas->kobj, "weights");
+ if (err) {
+ kobject_put(&weights->kobj);
+ return err;
+ }
+ quotas->weights = weights;
+
+ goals = damos_sysfs_quota_goals_alloc();
+ if (!goals) {
+ kobject_put(&weights->kobj);
+ return -ENOMEM;
+ }
+ err = kobject_init_and_add(&goals->kobj,
+ &damos_sysfs_quota_goals_ktype, &quotas->kobj,
+ "goals");
+ if (err) {
+ kobject_put(&weights->kobj);
+ kobject_put(&goals->kobj);
+ } else {
+ quotas->goals = goals;
+ }
+
+ return err;
+}
+
+static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas)
+{
+ kobject_put(&quotas->weights->kobj);
+ damos_sysfs_quota_goals_rm_dirs(quotas->goals);
+ kobject_put(&quotas->goals->kobj);
+}
+
+static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->ms);
+}
+
+static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+ int err = kstrtoul(buf, 0, &quotas->ms);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->sz);
+}
+
+static ssize_t bytes_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+ int err = kstrtoul(buf, 0, &quotas->sz);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t reset_interval_ms_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms);
+}
+
+static ssize_t reset_interval_ms_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+ int err = kstrtoul(buf, 0, &quotas->reset_interval_ms);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t effective_bytes_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->effective_sz);
+}
+
+static void damon_sysfs_quotas_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_quotas, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_quotas_ms_attr =
+ __ATTR_RW_MODE(ms, 0600);
+
+static struct kobj_attribute damon_sysfs_quotas_sz_attr =
+ __ATTR_RW_MODE(bytes, 0600);
+
+static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr =
+ __ATTR_RW_MODE(reset_interval_ms, 0600);
+
+static struct kobj_attribute damon_sysfs_quotas_effective_bytes_attr =
+ __ATTR_RO_MODE(effective_bytes, 0400);
+
+static struct attribute *damon_sysfs_quotas_attrs[] = {
+ &damon_sysfs_quotas_ms_attr.attr,
+ &damon_sysfs_quotas_sz_attr.attr,
+ &damon_sysfs_quotas_reset_interval_ms_attr.attr,
+ &damon_sysfs_quotas_effective_bytes_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_quotas);
+
+static const struct kobj_type damon_sysfs_quotas_ktype = {
+ .release = damon_sysfs_quotas_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_quotas_groups,
+};
+
+/*
+ * access_pattern directory
+ */
+
+struct damon_sysfs_access_pattern {
+ struct kobject kobj;
+ struct damon_sysfs_ul_range *sz;
+ struct damon_sysfs_ul_range *nr_accesses;
+ struct damon_sysfs_ul_range *age;
+};
+
+static
+struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void)
+{
+ struct damon_sysfs_access_pattern *access_pattern =
+ kmalloc(sizeof(*access_pattern), GFP_KERNEL);
+
+ if (!access_pattern)
+ return NULL;
+ access_pattern->kobj = (struct kobject){};
+ return access_pattern;
+}
+
+static int damon_sysfs_access_pattern_add_range_dir(
+ struct damon_sysfs_access_pattern *access_pattern,
+ struct damon_sysfs_ul_range **range_dir_ptr,
+ char *name)
+{
+ struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0);
+ int err;
+
+ if (!range)
+ return -ENOMEM;
+ err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype,
+ &access_pattern->kobj, "%s", name);
+ if (err)
+ kobject_put(&range->kobj);
+ else
+ *range_dir_ptr = range;
+ return err;
+}
+
+static int damon_sysfs_access_pattern_add_dirs(
+ struct damon_sysfs_access_pattern *access_pattern)
+{
+ int err;
+
+ err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+ &access_pattern->sz, "sz");
+ if (err)
+ goto put_sz_out;
+
+ err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+ &access_pattern->nr_accesses, "nr_accesses");
+ if (err)
+ goto put_nr_accesses_sz_out;
+
+ err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+ &access_pattern->age, "age");
+ if (err)
+ goto put_age_nr_accesses_sz_out;
+ return 0;
+
+put_age_nr_accesses_sz_out:
+ kobject_put(&access_pattern->age->kobj);
+ access_pattern->age = NULL;
+put_nr_accesses_sz_out:
+ kobject_put(&access_pattern->nr_accesses->kobj);
+ access_pattern->nr_accesses = NULL;
+put_sz_out:
+ kobject_put(&access_pattern->sz->kobj);
+ access_pattern->sz = NULL;
+ return err;
+}
+
+static void damon_sysfs_access_pattern_rm_dirs(
+ struct damon_sysfs_access_pattern *access_pattern)
+{
+ kobject_put(&access_pattern->sz->kobj);
+ kobject_put(&access_pattern->nr_accesses->kobj);
+ kobject_put(&access_pattern->age->kobj);
+}
+
+static void damon_sysfs_access_pattern_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj));
+}
+
+static struct attribute *damon_sysfs_access_pattern_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_access_pattern);
+
+static const struct kobj_type damon_sysfs_access_pattern_ktype = {
+ .release = damon_sysfs_access_pattern_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_access_pattern_groups,
+};
+
+/*
+ * dest (action destination) directory
+ */
+
+struct damos_sysfs_dest {
+ struct kobject kobj;
+ unsigned int id;
+ unsigned int weight;
+};
+
+static struct damos_sysfs_dest *damos_sysfs_dest_alloc(void)
+{
+ return kzalloc(sizeof(struct damos_sysfs_dest), GFP_KERNEL);
+}
+
+static ssize_t id_show(
+ struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct damos_sysfs_dest *dest = container_of(kobj,
+ struct damos_sysfs_dest, kobj);
+
+ return sysfs_emit(buf, "%u\n", dest->id);
+}
+
+static ssize_t id_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damos_sysfs_dest *dest = container_of(kobj,
+ struct damos_sysfs_dest, kobj);
+ int err = kstrtouint(buf, 0, &dest->id);
+
+ return err ? err : count;
+}
+
+static ssize_t weight_show(
+ struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct damos_sysfs_dest *dest = container_of(kobj,
+ struct damos_sysfs_dest, kobj);
+
+ return sysfs_emit(buf, "%u\n", dest->weight);
+}
+
+static ssize_t weight_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damos_sysfs_dest *dest = container_of(kobj,
+ struct damos_sysfs_dest, kobj);
+ int err = kstrtouint(buf, 0, &dest->weight);
+
+ return err ? err : count;
+}
+
+static void damos_sysfs_dest_release(struct kobject *kobj)
+{
+ struct damos_sysfs_dest *dest = container_of(kobj,
+ struct damos_sysfs_dest, kobj);
+ kfree(dest);
+}
+
+static struct kobj_attribute damos_sysfs_dest_id_attr =
+ __ATTR_RW_MODE(id, 0600);
+
+static struct kobj_attribute damos_sysfs_dest_weight_attr =
+ __ATTR_RW_MODE(weight, 0600);
+
+static struct attribute *damos_sysfs_dest_attrs[] = {
+ &damos_sysfs_dest_id_attr.attr,
+ &damos_sysfs_dest_weight_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damos_sysfs_dest);
+
+static const struct kobj_type damos_sysfs_dest_ktype = {
+ .release = damos_sysfs_dest_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damos_sysfs_dest_groups,
+};
+
+/*
+ * dests (action destinations) directory
+ */
+
+struct damos_sysfs_dests {
+ struct kobject kobj;
+ struct damos_sysfs_dest **dests_arr;
+ int nr;
+};
+
+static struct damos_sysfs_dests *
+damos_sysfs_dests_alloc(void)
+{
+ return kzalloc(sizeof(struct damos_sysfs_dests), GFP_KERNEL);
+}
+
+static void damos_sysfs_dests_rm_dirs(
+ struct damos_sysfs_dests *dests)
+{
+ struct damos_sysfs_dest **dests_arr = dests->dests_arr;
+ int i;
+
+ for (i = 0; i < dests->nr; i++)
+ kobject_put(&dests_arr[i]->kobj);
+ dests->nr = 0;
+ kfree(dests_arr);
+ dests->dests_arr = NULL;
+}
+
+static int damos_sysfs_dests_add_dirs(
+ struct damos_sysfs_dests *dests, int nr_dests)
+{
+ struct damos_sysfs_dest **dests_arr, *dest;
+ int err, i;
+
+ damos_sysfs_dests_rm_dirs(dests);
+ if (!nr_dests)
+ return 0;
+
+ dests_arr = kmalloc_array(nr_dests, sizeof(*dests_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!dests_arr)
+ return -ENOMEM;
+ dests->dests_arr = dests_arr;
+
+ for (i = 0; i < nr_dests; i++) {
+ dest = damos_sysfs_dest_alloc();
+ if (!dest) {
+ damos_sysfs_dests_rm_dirs(dests);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&dest->kobj,
+ &damos_sysfs_dest_ktype,
+ &dests->kobj, "%d", i);
+ if (err) {
+ kobject_put(&dest->kobj);
+ damos_sysfs_dests_rm_dirs(dests);
+ return err;
+ }
+
+ dests_arr[i] = dest;
+ dests->nr++;
+ }
+ return 0;
+}
+
+static ssize_t nr_dests_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damos_sysfs_dests *dests = container_of(kobj,
+ struct damos_sysfs_dests, kobj);
+
+ return sysfs_emit(buf, "%d\n", dests->nr);
+}
+
+static ssize_t nr_dests_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damos_sysfs_dests *dests;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ dests = container_of(kobj, struct damos_sysfs_dests, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damos_sysfs_dests_add_dirs(dests, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damos_sysfs_dests_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damos_sysfs_dests, kobj));
+}
+
+static struct kobj_attribute damos_sysfs_dests_nr_attr =
+ __ATTR_RW_MODE(nr_dests, 0600);
+
+static struct attribute *damos_sysfs_dests_attrs[] = {
+ &damos_sysfs_dests_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damos_sysfs_dests);
+
+static const struct kobj_type damos_sysfs_dests_ktype = {
+ .release = damos_sysfs_dests_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damos_sysfs_dests_groups,
+};
+
+/*
+ * scheme directory
+ */
+
+struct damon_sysfs_scheme {
+ struct kobject kobj;
+ enum damos_action action;
+ struct damon_sysfs_access_pattern *access_pattern;
+ unsigned long apply_interval_us;
+ struct damon_sysfs_quotas *quotas;
+ struct damon_sysfs_watermarks *watermarks;
+ struct damon_sysfs_scheme_filters *core_filters;
+ struct damon_sysfs_scheme_filters *ops_filters;
+ struct damon_sysfs_scheme_filters *filters;
+ struct damon_sysfs_stats *stats;
+ struct damon_sysfs_scheme_regions *tried_regions;
+ int target_nid;
+ struct damos_sysfs_dests *dests;
+};
+
+struct damos_sysfs_action_name {
+ enum damos_action action;
+ char *name;
+};
+
+static struct damos_sysfs_action_name damos_sysfs_action_names[] = {
+ {
+ .action = DAMOS_WILLNEED,
+ .name = "willneed",
+ },
+ {
+ .action = DAMOS_COLD,
+ .name = "cold",
+ },
+ {
+ .action = DAMOS_PAGEOUT,
+ .name = "pageout",
+ },
+ {
+ .action = DAMOS_HUGEPAGE,
+ .name = "hugepage",
+ },
+ {
+ .action = DAMOS_NOHUGEPAGE,
+ .name = "nohugepage",
+ },
+ {
+ .action = DAMOS_LRU_PRIO,
+ .name = "lru_prio",
+ },
+ {
+ .action = DAMOS_LRU_DEPRIO,
+ .name = "lru_deprio",
+ },
+ {
+ .action = DAMOS_MIGRATE_HOT,
+ .name = "migrate_hot",
+ },
+ {
+ .action = DAMOS_MIGRATE_COLD,
+ .name = "migrate_cold",
+ },
+ {
+ .action = DAMOS_STAT,
+ .name = "stat",
+ },
+};
+
+static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc(
+ enum damos_action action, unsigned long apply_interval_us)
+{
+ struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme),
+ GFP_KERNEL);
+
+ if (!scheme)
+ return NULL;
+ scheme->kobj = (struct kobject){};
+ scheme->action = action;
+ scheme->apply_interval_us = apply_interval_us;
+ scheme->target_nid = NUMA_NO_NODE;
+ return scheme;
+}
+
+static int damon_sysfs_scheme_set_access_pattern(
+ struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_access_pattern *access_pattern;
+ int err;
+
+ access_pattern = damon_sysfs_access_pattern_alloc();
+ if (!access_pattern)
+ return -ENOMEM;
+ err = kobject_init_and_add(&access_pattern->kobj,
+ &damon_sysfs_access_pattern_ktype, &scheme->kobj,
+ "access_pattern");
+ if (err)
+ goto out;
+ err = damon_sysfs_access_pattern_add_dirs(access_pattern);
+ if (err)
+ goto out;
+ scheme->access_pattern = access_pattern;
+ return 0;
+
+out:
+ kobject_put(&access_pattern->kobj);
+ return err;
+}
+
+static int damos_sysfs_set_dests(struct damon_sysfs_scheme *scheme)
+{
+ struct damos_sysfs_dests *dests = damos_sysfs_dests_alloc();
+ int err;
+
+ if (!dests)
+ return -ENOMEM;
+ err = kobject_init_and_add(&dests->kobj, &damos_sysfs_dests_ktype,
+ &scheme->kobj, "dests");
+ if (err)
+ kobject_put(&dests->kobj);
+ else
+ scheme->dests = dests;
+ return err;
+}
+
+static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc();
+ int err;
+
+ if (!quotas)
+ return -ENOMEM;
+ err = kobject_init_and_add(&quotas->kobj, &damon_sysfs_quotas_ktype,
+ &scheme->kobj, "quotas");
+ if (err)
+ goto out;
+ err = damon_sysfs_quotas_add_dirs(quotas);
+ if (err)
+ goto out;
+ scheme->quotas = quotas;
+ return 0;
+
+out:
+ kobject_put(&quotas->kobj);
+ return err;
+}
+
+static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_watermarks *watermarks =
+ damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0);
+ int err;
+
+ if (!watermarks)
+ return -ENOMEM;
+ err = kobject_init_and_add(&watermarks->kobj,
+ &damon_sysfs_watermarks_ktype, &scheme->kobj,
+ "watermarks");
+ if (err)
+ kobject_put(&watermarks->kobj);
+ else
+ scheme->watermarks = watermarks;
+ return err;
+}
+
+static int damon_sysfs_scheme_set_filters(struct damon_sysfs_scheme *scheme,
+ enum damos_sysfs_filter_handle_layer layer, const char *name,
+ struct damon_sysfs_scheme_filters **filters_ptr)
+{
+ struct damon_sysfs_scheme_filters *filters =
+ damon_sysfs_scheme_filters_alloc(layer);
+ int err;
+
+ if (!filters)
+ return -ENOMEM;
+ err = kobject_init_and_add(&filters->kobj,
+ &damon_sysfs_scheme_filters_ktype, &scheme->kobj,
+ "%s", name);
+ if (err)
+ kobject_put(&filters->kobj);
+ else
+ *filters_ptr = filters;
+ return err;
+}
+
+static int damos_sysfs_set_filter_dirs(struct damon_sysfs_scheme *scheme)
+{
+ int err;
+
+ err = damon_sysfs_scheme_set_filters(scheme,
+ DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH, "filters",
+ &scheme->filters);
+ if (err)
+ return err;
+ err = damon_sysfs_scheme_set_filters(scheme,
+ DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE, "core_filters",
+ &scheme->core_filters);
+ if (err)
+ goto put_filters_out;
+ err = damon_sysfs_scheme_set_filters(scheme,
+ DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS, "ops_filters",
+ &scheme->ops_filters);
+ if (err)
+ goto put_core_filters_out;
+ return 0;
+
+put_core_filters_out:
+ kobject_put(&scheme->core_filters->kobj);
+ scheme->core_filters = NULL;
+put_filters_out:
+ kobject_put(&scheme->filters->kobj);
+ scheme->filters = NULL;
+ return err;
+}
+
+static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc();
+ int err;
+
+ if (!stats)
+ return -ENOMEM;
+ err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype,
+ &scheme->kobj, "stats");
+ if (err)
+ kobject_put(&stats->kobj);
+ else
+ scheme->stats = stats;
+ return err;
+}
+
+static int damon_sysfs_scheme_set_tried_regions(
+ struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_scheme_regions *tried_regions =
+ damon_sysfs_scheme_regions_alloc();
+ int err;
+
+ if (!tried_regions)
+ return -ENOMEM;
+ err = kobject_init_and_add(&tried_regions->kobj,
+ &damon_sysfs_scheme_regions_ktype, &scheme->kobj,
+ "tried_regions");
+ if (err)
+ kobject_put(&tried_regions->kobj);
+ else
+ scheme->tried_regions = tried_regions;
+ return err;
+}
+
+static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme)
+{
+ int err;
+
+ err = damon_sysfs_scheme_set_access_pattern(scheme);
+ if (err)
+ return err;
+ err = damos_sysfs_set_dests(scheme);
+ if (err)
+ goto put_access_pattern_out;
+ err = damon_sysfs_scheme_set_quotas(scheme);
+ if (err)
+ goto put_dests_out;
+ err = damon_sysfs_scheme_set_watermarks(scheme);
+ if (err)
+ goto put_quotas_access_pattern_out;
+ err = damos_sysfs_set_filter_dirs(scheme);
+ if (err)
+ goto put_watermarks_quotas_access_pattern_out;
+ err = damon_sysfs_scheme_set_stats(scheme);
+ if (err)
+ goto put_filters_watermarks_quotas_access_pattern_out;
+ err = damon_sysfs_scheme_set_tried_regions(scheme);
+ if (err)
+ goto put_tried_regions_out;
+ return 0;
+
+put_tried_regions_out:
+ kobject_put(&scheme->tried_regions->kobj);
+ scheme->tried_regions = NULL;
+put_filters_watermarks_quotas_access_pattern_out:
+ kobject_put(&scheme->ops_filters->kobj);
+ scheme->ops_filters = NULL;
+ kobject_put(&scheme->core_filters->kobj);
+ scheme->core_filters = NULL;
+ kobject_put(&scheme->filters->kobj);
+ scheme->filters = NULL;
+put_watermarks_quotas_access_pattern_out:
+ kobject_put(&scheme->watermarks->kobj);
+ scheme->watermarks = NULL;
+put_quotas_access_pattern_out:
+ kobject_put(&scheme->quotas->kobj);
+ scheme->quotas = NULL;
+put_dests_out:
+ kobject_put(&scheme->dests->kobj);
+ scheme->dests = NULL;
+put_access_pattern_out:
+ kobject_put(&scheme->access_pattern->kobj);
+ scheme->access_pattern = NULL;
+ return err;
+}
+
+static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme)
+{
+ damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern);
+ kobject_put(&scheme->access_pattern->kobj);
+ damos_sysfs_dests_rm_dirs(scheme->dests);
+ kobject_put(&scheme->dests->kobj);
+ damon_sysfs_quotas_rm_dirs(scheme->quotas);
+ kobject_put(&scheme->quotas->kobj);
+ kobject_put(&scheme->watermarks->kobj);
+ damon_sysfs_scheme_filters_rm_dirs(scheme->filters);
+ kobject_put(&scheme->filters->kobj);
+ damon_sysfs_scheme_filters_rm_dirs(scheme->core_filters);
+ kobject_put(&scheme->core_filters->kobj);
+ damon_sysfs_scheme_filters_rm_dirs(scheme->ops_filters);
+ kobject_put(&scheme->ops_filters->kobj);
+ kobject_put(&scheme->stats->kobj);
+ damon_sysfs_scheme_regions_rm_dirs(scheme->tried_regions);
+ kobject_put(&scheme->tried_regions->kobj);
+}
+
+static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damos_sysfs_action_names); i++) {
+ struct damos_sysfs_action_name *action_name;
+
+ action_name = &damos_sysfs_action_names[i];
+ if (action_name->action == scheme->action)
+ return sysfs_emit(buf, "%s\n", action_name->name);
+ }
+ return -EINVAL;
+}
+
+static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damos_sysfs_action_names); i++) {
+ struct damos_sysfs_action_name *action_name;
+
+ action_name = &damos_sysfs_action_names[i];
+ if (sysfs_streq(buf, action_name->name)) {
+ scheme->action = action_name->action;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static ssize_t apply_interval_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+
+ return sysfs_emit(buf, "%lu\n", scheme->apply_interval_us);
+}
+
+static ssize_t apply_interval_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+ int err = kstrtoul(buf, 0, &scheme->apply_interval_us);
+
+ return err ? err : count;
+}
+
+static ssize_t target_nid_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+
+ return sysfs_emit(buf, "%d\n", scheme->target_nid);
+}
+
+static ssize_t target_nid_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+ int err = 0;
+
+ /* TODO: error handling for target_nid range. */
+ err = kstrtoint(buf, 0, &scheme->target_nid);
+
+ return err ? err : count;
+}
+
+static void damon_sysfs_scheme_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_scheme, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_scheme_action_attr =
+ __ATTR_RW_MODE(action, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_apply_interval_us_attr =
+ __ATTR_RW_MODE(apply_interval_us, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_target_nid_attr =
+ __ATTR_RW_MODE(target_nid, 0600);
+
+static struct attribute *damon_sysfs_scheme_attrs[] = {
+ &damon_sysfs_scheme_action_attr.attr,
+ &damon_sysfs_scheme_apply_interval_us_attr.attr,
+ &damon_sysfs_scheme_target_nid_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme);
+
+static const struct kobj_type damon_sysfs_scheme_ktype = {
+ .release = damon_sysfs_scheme_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_groups,
+};
+
+/*
+ * schemes directory
+ */
+
+struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL);
+}
+
+void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes)
+{
+ struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr;
+ int i;
+
+ for (i = 0; i < schemes->nr; i++) {
+ damon_sysfs_scheme_rm_dirs(schemes_arr[i]);
+ kobject_put(&schemes_arr[i]->kobj);
+ }
+ schemes->nr = 0;
+ kfree(schemes_arr);
+ schemes->schemes_arr = NULL;
+}
+
+static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes,
+ int nr_schemes)
+{
+ struct damon_sysfs_scheme **schemes_arr, *scheme;
+ int err, i;
+
+ damon_sysfs_schemes_rm_dirs(schemes);
+ if (!nr_schemes)
+ return 0;
+
+ schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!schemes_arr)
+ return -ENOMEM;
+ schemes->schemes_arr = schemes_arr;
+
+ for (i = 0; i < nr_schemes; i++) {
+ /*
+ * apply_interval_us as 0 means same to aggregation interval
+ * (same to before-apply_interval behavior)
+ */
+ scheme = damon_sysfs_scheme_alloc(DAMOS_STAT, 0);
+ if (!scheme) {
+ damon_sysfs_schemes_rm_dirs(schemes);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&scheme->kobj,
+ &damon_sysfs_scheme_ktype, &schemes->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+ err = damon_sysfs_scheme_add_dirs(scheme);
+ if (err)
+ goto out;
+
+ schemes_arr[i] = scheme;
+ schemes->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_schemes_rm_dirs(schemes);
+ kobject_put(&scheme->kobj);
+ return err;
+}
+
+static ssize_t nr_schemes_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_schemes *schemes = container_of(kobj,
+ struct damon_sysfs_schemes, kobj);
+
+ return sysfs_emit(buf, "%d\n", schemes->nr);
+}
+
+static ssize_t nr_schemes_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_schemes *schemes;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ schemes = container_of(kobj, struct damon_sysfs_schemes, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_schemes_add_dirs(schemes, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+ return count;
+}
+
+static void damon_sysfs_schemes_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_schemes, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_schemes_nr_attr =
+ __ATTR_RW_MODE(nr_schemes, 0600);
+
+static struct attribute *damon_sysfs_schemes_attrs[] = {
+ &damon_sysfs_schemes_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_schemes);
+
+const struct kobj_type damon_sysfs_schemes_ktype = {
+ .release = damon_sysfs_schemes_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_schemes_groups,
+};
+
+static bool damon_sysfs_memcg_path_eq(struct mem_cgroup *memcg,
+ char *memcg_path_buf, char *path)
+{
+#ifdef CONFIG_MEMCG
+ cgroup_path(memcg->css.cgroup, memcg_path_buf, PATH_MAX);
+ if (sysfs_streq(memcg_path_buf, path))
+ return true;
+#endif /* CONFIG_MEMCG */
+ return false;
+}
+
+static int damon_sysfs_memcg_path_to_id(char *memcg_path, unsigned short *id)
+{
+ struct mem_cgroup *memcg;
+ char *path;
+ bool found = false;
+
+ if (!memcg_path)
+ return -EINVAL;
+
+ path = kmalloc_array(PATH_MAX, sizeof(*path), GFP_KERNEL);
+ if (!path)
+ return -ENOMEM;
+
+ for (memcg = mem_cgroup_iter(NULL, NULL, NULL); memcg;
+ memcg = mem_cgroup_iter(NULL, memcg, NULL)) {
+ /* skip removed memcg */
+ if (!mem_cgroup_id(memcg))
+ continue;
+ if (damon_sysfs_memcg_path_eq(memcg, path, memcg_path)) {
+ *id = mem_cgroup_id(memcg);
+ found = true;
+ break;
+ }
+ }
+
+ kfree(path);
+ return found ? 0 : -EINVAL;
+}
+
+static int damon_sysfs_add_scheme_filters(struct damos *scheme,
+ struct damon_sysfs_scheme_filters *sysfs_filters)
+{
+ int i;
+
+ for (i = 0; i < sysfs_filters->nr; i++) {
+ struct damon_sysfs_scheme_filter *sysfs_filter =
+ sysfs_filters->filters_arr[i];
+ struct damos_filter *filter =
+ damos_new_filter(sysfs_filter->type,
+ sysfs_filter->matching,
+ sysfs_filter->allow);
+ int err;
+
+ if (!filter)
+ return -ENOMEM;
+ if (filter->type == DAMOS_FILTER_TYPE_MEMCG) {
+ err = damon_sysfs_memcg_path_to_id(
+ sysfs_filter->memcg_path,
+ &filter->memcg_id);
+ if (err) {
+ damos_destroy_filter(filter);
+ return err;
+ }
+ } else if (filter->type == DAMOS_FILTER_TYPE_ADDR) {
+ if (sysfs_filter->addr_range.end <
+ sysfs_filter->addr_range.start) {
+ damos_destroy_filter(filter);
+ return -EINVAL;
+ }
+ filter->addr_range = sysfs_filter->addr_range;
+ } else if (filter->type == DAMOS_FILTER_TYPE_TARGET) {
+ filter->target_idx = sysfs_filter->target_idx;
+ } else if (filter->type == DAMOS_FILTER_TYPE_HUGEPAGE_SIZE) {
+ if (sysfs_filter->sz_range.min >
+ sysfs_filter->sz_range.max) {
+ damos_destroy_filter(filter);
+ return -EINVAL;
+ }
+ filter->sz_range = sysfs_filter->sz_range;
+ }
+
+ damos_add_filter(scheme, filter);
+ }
+ return 0;
+}
+
+static int damos_sysfs_add_quota_score(
+ struct damos_sysfs_quota_goals *sysfs_goals,
+ struct damos_quota *quota)
+{
+ struct damos_quota_goal *goal;
+ int i, err;
+
+ for (i = 0; i < sysfs_goals->nr; i++) {
+ struct damos_sysfs_quota_goal *sysfs_goal =
+ sysfs_goals->goals_arr[i];
+
+ if (!sysfs_goal->target_value)
+ continue;
+
+ goal = damos_new_quota_goal(sysfs_goal->metric,
+ sysfs_goal->target_value);
+ if (!goal)
+ return -ENOMEM;
+ switch (sysfs_goal->metric) {
+ case DAMOS_QUOTA_USER_INPUT:
+ goal->current_value = sysfs_goal->current_value;
+ break;
+ case DAMOS_QUOTA_NODE_MEM_USED_BP:
+ case DAMOS_QUOTA_NODE_MEM_FREE_BP:
+ goal->nid = sysfs_goal->nid;
+ break;
+ case DAMOS_QUOTA_NODE_MEMCG_USED_BP:
+ case DAMOS_QUOTA_NODE_MEMCG_FREE_BP:
+ err = damon_sysfs_memcg_path_to_id(
+ sysfs_goal->path, &goal->memcg_id);
+ if (err) {
+ damos_destroy_quota_goal(goal);
+ return err;
+ }
+ goal->nid = sysfs_goal->nid;
+ break;
+ default:
+ break;
+ }
+ damos_add_quota_goal(quota, goal);
+ }
+ return 0;
+}
+
+int damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx)
+{
+ struct damos *scheme;
+ struct damos_quota quota = {};
+ int i = 0;
+
+ INIT_LIST_HEAD(&quota.goals);
+ damon_for_each_scheme(scheme, ctx) {
+ struct damon_sysfs_scheme *sysfs_scheme;
+ struct damos_quota_goal *g, *g_next;
+ int err;
+
+ /* user could have removed the scheme sysfs dir */
+ if (i >= sysfs_schemes->nr)
+ break;
+
+ sysfs_scheme = sysfs_schemes->schemes_arr[i];
+ err = damos_sysfs_add_quota_score(sysfs_scheme->quotas->goals,
+ &quota);
+ if (err) {
+ damos_for_each_quota_goal_safe(g, g_next, &quota)
+ damos_destroy_quota_goal(g);
+ return err;
+ }
+ err = damos_commit_quota_goals(&scheme->quota, &quota);
+ damos_for_each_quota_goal_safe(g, g_next, &quota)
+ damos_destroy_quota_goal(g);
+ if (err)
+ return err;
+ i++;
+ }
+ return 0;
+}
+
+void damos_sysfs_update_effective_quotas(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx)
+{
+ struct damos *scheme;
+ int schemes_idx = 0;
+
+ damon_for_each_scheme(scheme, ctx) {
+ struct damon_sysfs_quotas *sysfs_quotas;
+
+ /* user could have removed the scheme sysfs dir */
+ if (schemes_idx >= sysfs_schemes->nr)
+ break;
+
+ sysfs_quotas =
+ sysfs_schemes->schemes_arr[schemes_idx++]->quotas;
+ sysfs_quotas->effective_sz = scheme->quota.esz;
+ }
+}
+
+static int damos_sysfs_add_migrate_dest(struct damos *scheme,
+ struct damos_sysfs_dests *sysfs_dests)
+{
+ struct damos_migrate_dests *dests = &scheme->migrate_dests;
+ int i;
+
+ dests->node_id_arr = kmalloc_array(sysfs_dests->nr,
+ sizeof(*dests->node_id_arr), GFP_KERNEL);
+ if (!dests->node_id_arr)
+ return -ENOMEM;
+ dests->weight_arr = kmalloc_array(sysfs_dests->nr,
+ sizeof(*dests->weight_arr), GFP_KERNEL);
+ if (!dests->weight_arr)
+ /* ->node_id_arr will be freed by scheme destruction */
+ return -ENOMEM;
+ for (i = 0; i < sysfs_dests->nr; i++) {
+ dests->node_id_arr[i] = sysfs_dests->dests_arr[i]->id;
+ dests->weight_arr[i] = sysfs_dests->dests_arr[i]->weight;
+ }
+ dests->nr_dests = sysfs_dests->nr;
+ return 0;
+}
+
+static struct damos *damon_sysfs_mk_scheme(
+ struct damon_sysfs_scheme *sysfs_scheme)
+{
+ struct damon_sysfs_access_pattern *access_pattern =
+ sysfs_scheme->access_pattern;
+ struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
+ struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
+ struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
+ struct damos *scheme;
+ int err;
+
+ struct damos_access_pattern pattern = {
+ .min_sz_region = access_pattern->sz->min,
+ .max_sz_region = access_pattern->sz->max,
+ .min_nr_accesses = access_pattern->nr_accesses->min,
+ .max_nr_accesses = access_pattern->nr_accesses->max,
+ .min_age_region = access_pattern->age->min,
+ .max_age_region = access_pattern->age->max,
+ };
+ struct damos_quota quota = {
+ .ms = sysfs_quotas->ms,
+ .sz = sysfs_quotas->sz,
+ .reset_interval = sysfs_quotas->reset_interval_ms,
+ .weight_sz = sysfs_weights->sz,
+ .weight_nr_accesses = sysfs_weights->nr_accesses,
+ .weight_age = sysfs_weights->age,
+ };
+ struct damos_watermarks wmarks = {
+ .metric = sysfs_wmarks->metric,
+ .interval = sysfs_wmarks->interval_us,
+ .high = sysfs_wmarks->high,
+ .mid = sysfs_wmarks->mid,
+ .low = sysfs_wmarks->low,
+ };
+
+ scheme = damon_new_scheme(&pattern, sysfs_scheme->action,
+ sysfs_scheme->apply_interval_us, &quota, &wmarks,
+ sysfs_scheme->target_nid);
+ if (!scheme)
+ return NULL;
+
+ err = damos_sysfs_add_quota_score(sysfs_quotas->goals, &scheme->quota);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+
+ err = damon_sysfs_add_scheme_filters(scheme, sysfs_scheme->core_filters);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+ err = damon_sysfs_add_scheme_filters(scheme, sysfs_scheme->ops_filters);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+ err = damon_sysfs_add_scheme_filters(scheme, sysfs_scheme->filters);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+ err = damos_sysfs_add_migrate_dest(scheme, sysfs_scheme->dests);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+ return scheme;
+}
+
+int damon_sysfs_add_schemes(struct damon_ctx *ctx,
+ struct damon_sysfs_schemes *sysfs_schemes)
+{
+ int i;
+
+ for (i = 0; i < sysfs_schemes->nr; i++) {
+ struct damos *scheme, *next;
+
+ scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
+ if (!scheme) {
+ damon_for_each_scheme_safe(scheme, next, ctx)
+ damon_destroy_scheme(scheme);
+ return -ENOMEM;
+ }
+ damon_add_scheme(ctx, scheme);
+ }
+ return 0;
+}
+
+void damon_sysfs_schemes_update_stats(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx)
+{
+ struct damos *scheme;
+ int schemes_idx = 0;
+
+ damon_for_each_scheme(scheme, ctx) {
+ struct damon_sysfs_stats *sysfs_stats;
+
+ /* user could have removed the scheme sysfs dir */
+ if (schemes_idx >= sysfs_schemes->nr)
+ break;
+
+ sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats;
+ sysfs_stats->nr_tried = scheme->stat.nr_tried;
+ sysfs_stats->sz_tried = scheme->stat.sz_tried;
+ sysfs_stats->nr_applied = scheme->stat.nr_applied;
+ sysfs_stats->sz_applied = scheme->stat.sz_applied;
+ sysfs_stats->sz_ops_filter_passed =
+ scheme->stat.sz_ops_filter_passed;
+ sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds;
+ }
+}
+
+/**
+ * damos_sysfs_populate_region_dir() - Populate a schemes tried region dir.
+ * @sysfs_schemes: Schemes directory to populate regions directory.
+ * @ctx: Corresponding DAMON context.
+ * @t: DAMON target of @r.
+ * @r: DAMON region to populate the directory for.
+ * @s: Corresponding scheme.
+ * @total_bytes_only: Whether the request is for bytes update only.
+ * @sz_filter_passed: Bytes of @r that passed filters of @s.
+ *
+ * Called from DAMOS walk callback while holding damon_sysfs_lock.
+ */
+void damos_sysfs_populate_region_dir(struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx, struct damon_target *t,
+ struct damon_region *r, struct damos *s, bool total_bytes_only,
+ unsigned long sz_filter_passed)
+{
+ struct damos *scheme;
+ struct damon_sysfs_scheme_regions *sysfs_regions;
+ struct damon_sysfs_scheme_region *region;
+ int schemes_idx = 0;
+
+ damon_for_each_scheme(scheme, ctx) {
+ if (scheme == s)
+ break;
+ schemes_idx++;
+ }
+
+ /* user could have removed the scheme sysfs dir */
+ if (schemes_idx >= sysfs_schemes->nr)
+ return;
+
+ sysfs_regions = sysfs_schemes->schemes_arr[schemes_idx]->tried_regions;
+ sysfs_regions->total_bytes += r->ar.end - r->ar.start;
+ if (total_bytes_only)
+ return;
+
+ region = damon_sysfs_scheme_region_alloc(r);
+ if (!region)
+ return;
+ region->sz_filter_passed = sz_filter_passed;
+ list_add_tail(&region->list, &sysfs_regions->regions_list);
+ sysfs_regions->nr_regions++;
+ if (kobject_init_and_add(&region->kobj,
+ &damon_sysfs_scheme_region_ktype,
+ &sysfs_regions->kobj, "%d",
+ sysfs_regions->nr_regions++)) {
+ kobject_put(&region->kobj);
+ }
+}
+
+int damon_sysfs_schemes_clear_regions(
+ struct damon_sysfs_schemes *sysfs_schemes)
+{
+ int i;
+
+ for (i = 0; i < sysfs_schemes->nr; i++) {
+ struct damon_sysfs_scheme *sysfs_scheme;
+
+ sysfs_scheme = sysfs_schemes->schemes_arr[i];
+ damon_sysfs_scheme_regions_rm_dirs(
+ sysfs_scheme->tried_regions);
+ sysfs_scheme->tried_regions->total_bytes = 0;
+ }
+ return 0;
+}
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
new file mode 100644
index 000000000000..e2bd2d7becdd
--- /dev/null
+++ b/mm/damon/sysfs.c
@@ -0,0 +1,2108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON sysfs Interface
+ *
+ * Copyright (c) 2022 SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "sysfs-common.h"
+
+/*
+ * init region directory
+ */
+
+struct damon_sysfs_region {
+ struct kobject kobj;
+ struct damon_addr_range ar;
+};
+
+static struct damon_sysfs_region *damon_sysfs_region_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_region), GFP_KERNEL);
+}
+
+static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->ar.start);
+}
+
+static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+ int err = kstrtoul(buf, 0, &region->ar.start);
+
+ return err ? err : count;
+}
+
+static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->ar.end);
+}
+
+static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+ int err = kstrtoul(buf, 0, &region->ar.end);
+
+ return err ? err : count;
+}
+
+static void damon_sysfs_region_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_region, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_region_start_attr =
+ __ATTR_RW_MODE(start, 0600);
+
+static struct kobj_attribute damon_sysfs_region_end_attr =
+ __ATTR_RW_MODE(end, 0600);
+
+static struct attribute *damon_sysfs_region_attrs[] = {
+ &damon_sysfs_region_start_attr.attr,
+ &damon_sysfs_region_end_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_region);
+
+static const struct kobj_type damon_sysfs_region_ktype = {
+ .release = damon_sysfs_region_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_region_groups,
+};
+
+/*
+ * init_regions directory
+ */
+
+struct damon_sysfs_regions {
+ struct kobject kobj;
+ struct damon_sysfs_region **regions_arr;
+ int nr;
+};
+
+static struct damon_sysfs_regions *damon_sysfs_regions_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_regions), GFP_KERNEL);
+}
+
+static void damon_sysfs_regions_rm_dirs(struct damon_sysfs_regions *regions)
+{
+ struct damon_sysfs_region **regions_arr = regions->regions_arr;
+ int i;
+
+ for (i = 0; i < regions->nr; i++)
+ kobject_put(&regions_arr[i]->kobj);
+ regions->nr = 0;
+ kfree(regions_arr);
+ regions->regions_arr = NULL;
+}
+
+static int damon_sysfs_regions_add_dirs(struct damon_sysfs_regions *regions,
+ int nr_regions)
+{
+ struct damon_sysfs_region **regions_arr, *region;
+ int err, i;
+
+ damon_sysfs_regions_rm_dirs(regions);
+ if (!nr_regions)
+ return 0;
+
+ regions_arr = kmalloc_array(nr_regions, sizeof(*regions_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!regions_arr)
+ return -ENOMEM;
+ regions->regions_arr = regions_arr;
+
+ for (i = 0; i < nr_regions; i++) {
+ region = damon_sysfs_region_alloc();
+ if (!region) {
+ damon_sysfs_regions_rm_dirs(regions);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&region->kobj,
+ &damon_sysfs_region_ktype, &regions->kobj,
+ "%d", i);
+ if (err) {
+ kobject_put(&region->kobj);
+ damon_sysfs_regions_rm_dirs(regions);
+ return err;
+ }
+
+ regions_arr[i] = region;
+ regions->nr++;
+ }
+ return 0;
+}
+
+static ssize_t nr_regions_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_regions *regions = container_of(kobj,
+ struct damon_sysfs_regions, kobj);
+
+ return sysfs_emit(buf, "%d\n", regions->nr);
+}
+
+static ssize_t nr_regions_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_regions *regions;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ regions = container_of(kobj, struct damon_sysfs_regions, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_regions_add_dirs(regions, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_regions_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_regions, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_regions_nr_attr =
+ __ATTR_RW_MODE(nr_regions, 0600);
+
+static struct attribute *damon_sysfs_regions_attrs[] = {
+ &damon_sysfs_regions_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_regions);
+
+static const struct kobj_type damon_sysfs_regions_ktype = {
+ .release = damon_sysfs_regions_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_regions_groups,
+};
+
+/*
+ * target directory
+ */
+
+struct damon_sysfs_target {
+ struct kobject kobj;
+ struct damon_sysfs_regions *regions;
+ int pid;
+ bool obsolete;
+};
+
+static struct damon_sysfs_target *damon_sysfs_target_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_target), GFP_KERNEL);
+}
+
+static int damon_sysfs_target_add_dirs(struct damon_sysfs_target *target)
+{
+ struct damon_sysfs_regions *regions = damon_sysfs_regions_alloc();
+ int err;
+
+ if (!regions)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&regions->kobj, &damon_sysfs_regions_ktype,
+ &target->kobj, "regions");
+ if (err)
+ kobject_put(&regions->kobj);
+ else
+ target->regions = regions;
+ return err;
+}
+
+static void damon_sysfs_target_rm_dirs(struct damon_sysfs_target *target)
+{
+ damon_sysfs_regions_rm_dirs(target->regions);
+ kobject_put(&target->regions->kobj);
+}
+
+static ssize_t pid_target_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_target *target = container_of(kobj,
+ struct damon_sysfs_target, kobj);
+
+ return sysfs_emit(buf, "%d\n", target->pid);
+}
+
+static ssize_t pid_target_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_target *target = container_of(kobj,
+ struct damon_sysfs_target, kobj);
+ int err = kstrtoint(buf, 0, &target->pid);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t obsolete_target_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_target *target = container_of(kobj,
+ struct damon_sysfs_target, kobj);
+
+ return sysfs_emit(buf, "%c\n", target->obsolete ? 'Y' : 'N');
+}
+
+static ssize_t obsolete_target_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_target *target = container_of(kobj,
+ struct damon_sysfs_target, kobj);
+ bool obsolete;
+ int err = kstrtobool(buf, &obsolete);
+
+ if (err)
+ return err;
+ target->obsolete = obsolete;
+ return count;
+}
+
+static void damon_sysfs_target_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_target, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_target_pid_attr =
+ __ATTR_RW_MODE(pid_target, 0600);
+
+static struct kobj_attribute damon_sysfs_target_obsolete_attr =
+ __ATTR_RW_MODE(obsolete_target, 0600);
+
+static struct attribute *damon_sysfs_target_attrs[] = {
+ &damon_sysfs_target_pid_attr.attr,
+ &damon_sysfs_target_obsolete_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_target);
+
+static const struct kobj_type damon_sysfs_target_ktype = {
+ .release = damon_sysfs_target_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_target_groups,
+};
+
+/*
+ * targets directory
+ */
+
+struct damon_sysfs_targets {
+ struct kobject kobj;
+ struct damon_sysfs_target **targets_arr;
+ int nr;
+};
+
+static struct damon_sysfs_targets *damon_sysfs_targets_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_targets), GFP_KERNEL);
+}
+
+static void damon_sysfs_targets_rm_dirs(struct damon_sysfs_targets *targets)
+{
+ struct damon_sysfs_target **targets_arr = targets->targets_arr;
+ int i;
+
+ for (i = 0; i < targets->nr; i++) {
+ damon_sysfs_target_rm_dirs(targets_arr[i]);
+ kobject_put(&targets_arr[i]->kobj);
+ }
+ targets->nr = 0;
+ kfree(targets_arr);
+ targets->targets_arr = NULL;
+}
+
+static int damon_sysfs_targets_add_dirs(struct damon_sysfs_targets *targets,
+ int nr_targets)
+{
+ struct damon_sysfs_target **targets_arr, *target;
+ int err, i;
+
+ damon_sysfs_targets_rm_dirs(targets);
+ if (!nr_targets)
+ return 0;
+
+ targets_arr = kmalloc_array(nr_targets, sizeof(*targets_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!targets_arr)
+ return -ENOMEM;
+ targets->targets_arr = targets_arr;
+
+ for (i = 0; i < nr_targets; i++) {
+ target = damon_sysfs_target_alloc();
+ if (!target) {
+ damon_sysfs_targets_rm_dirs(targets);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&target->kobj,
+ &damon_sysfs_target_ktype, &targets->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+
+ err = damon_sysfs_target_add_dirs(target);
+ if (err)
+ goto out;
+
+ targets_arr[i] = target;
+ targets->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_targets_rm_dirs(targets);
+ kobject_put(&target->kobj);
+ return err;
+}
+
+static ssize_t nr_targets_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_targets *targets = container_of(kobj,
+ struct damon_sysfs_targets, kobj);
+
+ return sysfs_emit(buf, "%d\n", targets->nr);
+}
+
+static ssize_t nr_targets_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_targets *targets;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ targets = container_of(kobj, struct damon_sysfs_targets, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_targets_add_dirs(targets, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_targets_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_targets, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_targets_nr_attr =
+ __ATTR_RW_MODE(nr_targets, 0600);
+
+static struct attribute *damon_sysfs_targets_attrs[] = {
+ &damon_sysfs_targets_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_targets);
+
+static const struct kobj_type damon_sysfs_targets_ktype = {
+ .release = damon_sysfs_targets_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_targets_groups,
+};
+
+/*
+ * intervals goal directory
+ */
+
+struct damon_sysfs_intervals_goal {
+ struct kobject kobj;
+ unsigned long access_bp;
+ unsigned long aggrs;
+ unsigned long min_sample_us;
+ unsigned long max_sample_us;
+};
+
+static struct damon_sysfs_intervals_goal *damon_sysfs_intervals_goal_alloc(
+ unsigned long access_bp, unsigned long aggrs,
+ unsigned long min_sample_us, unsigned long max_sample_us)
+{
+ struct damon_sysfs_intervals_goal *goal = kmalloc(sizeof(*goal),
+ GFP_KERNEL);
+
+ if (!goal)
+ return NULL;
+
+ goal->kobj = (struct kobject){};
+ goal->access_bp = access_bp;
+ goal->aggrs = aggrs;
+ goal->min_sample_us = min_sample_us;
+ goal->max_sample_us = max_sample_us;
+ return goal;
+}
+
+static ssize_t access_bp_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals_goal *goal = container_of(kobj,
+ struct damon_sysfs_intervals_goal, kobj);
+
+ return sysfs_emit(buf, "%lu\n", goal->access_bp);
+}
+
+static ssize_t access_bp_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals_goal *goal = container_of(kobj,
+ struct damon_sysfs_intervals_goal, kobj);
+ unsigned long nr;
+ int err = kstrtoul(buf, 0, &nr);
+
+ if (err)
+ return err;
+
+ goal->access_bp = nr;
+ return count;
+}
+
+static ssize_t aggrs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals_goal *goal = container_of(kobj,
+ struct damon_sysfs_intervals_goal, kobj);
+
+ return sysfs_emit(buf, "%lu\n", goal->aggrs);
+}
+
+static ssize_t aggrs_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals_goal *goal = container_of(kobj,
+ struct damon_sysfs_intervals_goal, kobj);
+ unsigned long nr;
+ int err = kstrtoul(buf, 0, &nr);
+
+ if (err)
+ return err;
+
+ goal->aggrs = nr;
+ return count;
+}
+
+static ssize_t min_sample_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals_goal *goal = container_of(kobj,
+ struct damon_sysfs_intervals_goal, kobj);
+
+ return sysfs_emit(buf, "%lu\n", goal->min_sample_us);
+}
+
+static ssize_t min_sample_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals_goal *goal = container_of(kobj,
+ struct damon_sysfs_intervals_goal, kobj);
+ unsigned long nr;
+ int err = kstrtoul(buf, 0, &nr);
+
+ if (err)
+ return err;
+
+ goal->min_sample_us = nr;
+ return count;
+}
+
+static ssize_t max_sample_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals_goal *goal = container_of(kobj,
+ struct damon_sysfs_intervals_goal, kobj);
+
+ return sysfs_emit(buf, "%lu\n", goal->max_sample_us);
+}
+
+static ssize_t max_sample_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals_goal *goal = container_of(kobj,
+ struct damon_sysfs_intervals_goal, kobj);
+ unsigned long nr;
+ int err = kstrtoul(buf, 0, &nr);
+
+ if (err)
+ return err;
+
+ goal->max_sample_us = nr;
+ return count;
+}
+
+static void damon_sysfs_intervals_goal_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_intervals_goal, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_intervals_goal_access_bp_attr =
+ __ATTR_RW_MODE(access_bp, 0600);
+
+static struct kobj_attribute damon_sysfs_intervals_goal_aggrs_attr =
+ __ATTR_RW_MODE(aggrs, 0600);
+
+static struct kobj_attribute damon_sysfs_intervals_goal_min_sample_us_attr =
+ __ATTR_RW_MODE(min_sample_us, 0600);
+
+static struct kobj_attribute damon_sysfs_intervals_goal_max_sample_us_attr =
+ __ATTR_RW_MODE(max_sample_us, 0600);
+
+static struct attribute *damon_sysfs_intervals_goal_attrs[] = {
+ &damon_sysfs_intervals_goal_access_bp_attr.attr,
+ &damon_sysfs_intervals_goal_aggrs_attr.attr,
+ &damon_sysfs_intervals_goal_min_sample_us_attr.attr,
+ &damon_sysfs_intervals_goal_max_sample_us_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_intervals_goal);
+
+static const struct kobj_type damon_sysfs_intervals_goal_ktype = {
+ .release = damon_sysfs_intervals_goal_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_intervals_goal_groups,
+};
+
+/*
+ * intervals directory
+ */
+
+struct damon_sysfs_intervals {
+ struct kobject kobj;
+ unsigned long sample_us;
+ unsigned long aggr_us;
+ unsigned long update_us;
+ struct damon_sysfs_intervals_goal *intervals_goal;
+};
+
+static struct damon_sysfs_intervals *damon_sysfs_intervals_alloc(
+ unsigned long sample_us, unsigned long aggr_us,
+ unsigned long update_us)
+{
+ struct damon_sysfs_intervals *intervals = kmalloc(sizeof(*intervals),
+ GFP_KERNEL);
+
+ if (!intervals)
+ return NULL;
+
+ intervals->kobj = (struct kobject){};
+ intervals->sample_us = sample_us;
+ intervals->aggr_us = aggr_us;
+ intervals->update_us = update_us;
+ return intervals;
+}
+
+static int damon_sysfs_intervals_add_dirs(struct damon_sysfs_intervals *intervals)
+{
+ struct damon_sysfs_intervals_goal *goal;
+ int err;
+
+ goal = damon_sysfs_intervals_goal_alloc(0, 0, 0, 0);
+ if (!goal)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&goal->kobj,
+ &damon_sysfs_intervals_goal_ktype, &intervals->kobj,
+ "intervals_goal");
+ if (err) {
+ kobject_put(&goal->kobj);
+ intervals->intervals_goal = NULL;
+ return err;
+ }
+ intervals->intervals_goal = goal;
+ return 0;
+}
+
+static void damon_sysfs_intervals_rm_dirs(struct damon_sysfs_intervals *intervals)
+{
+ kobject_put(&intervals->intervals_goal->kobj);
+}
+
+static ssize_t sample_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+
+ return sysfs_emit(buf, "%lu\n", intervals->sample_us);
+}
+
+static ssize_t sample_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+ unsigned long us;
+ int err = kstrtoul(buf, 0, &us);
+
+ if (err)
+ return err;
+
+ intervals->sample_us = us;
+ return count;
+}
+
+static ssize_t aggr_us_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+
+ return sysfs_emit(buf, "%lu\n", intervals->aggr_us);
+}
+
+static ssize_t aggr_us_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+ unsigned long us;
+ int err = kstrtoul(buf, 0, &us);
+
+ if (err)
+ return err;
+
+ intervals->aggr_us = us;
+ return count;
+}
+
+static ssize_t update_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+
+ return sysfs_emit(buf, "%lu\n", intervals->update_us);
+}
+
+static ssize_t update_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+ unsigned long us;
+ int err = kstrtoul(buf, 0, &us);
+
+ if (err)
+ return err;
+
+ intervals->update_us = us;
+ return count;
+}
+
+static void damon_sysfs_intervals_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_intervals, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_intervals_sample_us_attr =
+ __ATTR_RW_MODE(sample_us, 0600);
+
+static struct kobj_attribute damon_sysfs_intervals_aggr_us_attr =
+ __ATTR_RW_MODE(aggr_us, 0600);
+
+static struct kobj_attribute damon_sysfs_intervals_update_us_attr =
+ __ATTR_RW_MODE(update_us, 0600);
+
+static struct attribute *damon_sysfs_intervals_attrs[] = {
+ &damon_sysfs_intervals_sample_us_attr.attr,
+ &damon_sysfs_intervals_aggr_us_attr.attr,
+ &damon_sysfs_intervals_update_us_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_intervals);
+
+static const struct kobj_type damon_sysfs_intervals_ktype = {
+ .release = damon_sysfs_intervals_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_intervals_groups,
+};
+
+/*
+ * monitoring_attrs directory
+ */
+
+struct damon_sysfs_attrs {
+ struct kobject kobj;
+ struct damon_sysfs_intervals *intervals;
+ struct damon_sysfs_ul_range *nr_regions_range;
+};
+
+static struct damon_sysfs_attrs *damon_sysfs_attrs_alloc(void)
+{
+ struct damon_sysfs_attrs *attrs = kmalloc(sizeof(*attrs), GFP_KERNEL);
+
+ if (!attrs)
+ return NULL;
+ attrs->kobj = (struct kobject){};
+ return attrs;
+}
+
+static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs)
+{
+ struct damon_sysfs_intervals *intervals;
+ struct damon_sysfs_ul_range *nr_regions_range;
+ int err;
+
+ intervals = damon_sysfs_intervals_alloc(5000, 100000, 60000000);
+ if (!intervals)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&intervals->kobj,
+ &damon_sysfs_intervals_ktype, &attrs->kobj,
+ "intervals");
+ if (err)
+ goto put_intervals_out;
+ err = damon_sysfs_intervals_add_dirs(intervals);
+ if (err)
+ goto put_intervals_out;
+ attrs->intervals = intervals;
+
+ nr_regions_range = damon_sysfs_ul_range_alloc(10, 1000);
+ if (!nr_regions_range) {
+ err = -ENOMEM;
+ goto put_intervals_out;
+ }
+
+ err = kobject_init_and_add(&nr_regions_range->kobj,
+ &damon_sysfs_ul_range_ktype, &attrs->kobj,
+ "nr_regions");
+ if (err)
+ goto put_nr_regions_intervals_out;
+ attrs->nr_regions_range = nr_regions_range;
+ return 0;
+
+put_nr_regions_intervals_out:
+ kobject_put(&nr_regions_range->kobj);
+ attrs->nr_regions_range = NULL;
+put_intervals_out:
+ kobject_put(&intervals->kobj);
+ attrs->intervals = NULL;
+ return err;
+}
+
+static void damon_sysfs_attrs_rm_dirs(struct damon_sysfs_attrs *attrs)
+{
+ kobject_put(&attrs->nr_regions_range->kobj);
+ damon_sysfs_intervals_rm_dirs(attrs->intervals);
+ kobject_put(&attrs->intervals->kobj);
+}
+
+static void damon_sysfs_attrs_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_attrs, kobj));
+}
+
+static struct attribute *damon_sysfs_attrs_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_attrs);
+
+static const struct kobj_type damon_sysfs_attrs_ktype = {
+ .release = damon_sysfs_attrs_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_attrs_groups,
+};
+
+/*
+ * context directory
+ */
+
+struct damon_sysfs_ops_name {
+ enum damon_ops_id ops_id;
+ char *name;
+};
+
+static const struct damon_sysfs_ops_name damon_sysfs_ops_names[] = {
+ {
+ .ops_id = DAMON_OPS_VADDR,
+ .name = "vaddr",
+ },
+ {
+ .ops_id = DAMON_OPS_FVADDR,
+ .name = "fvaddr",
+ },
+ {
+ .ops_id = DAMON_OPS_PADDR,
+ .name = "paddr",
+ },
+};
+
+struct damon_sysfs_context {
+ struct kobject kobj;
+ enum damon_ops_id ops_id;
+ unsigned long addr_unit;
+ struct damon_sysfs_attrs *attrs;
+ struct damon_sysfs_targets *targets;
+ struct damon_sysfs_schemes *schemes;
+};
+
+static struct damon_sysfs_context *damon_sysfs_context_alloc(
+ enum damon_ops_id ops_id)
+{
+ struct damon_sysfs_context *context = kmalloc(sizeof(*context),
+ GFP_KERNEL);
+
+ if (!context)
+ return NULL;
+ context->kobj = (struct kobject){};
+ context->ops_id = ops_id;
+ context->addr_unit = 1;
+ return context;
+}
+
+static int damon_sysfs_context_set_attrs(struct damon_sysfs_context *context)
+{
+ struct damon_sysfs_attrs *attrs = damon_sysfs_attrs_alloc();
+ int err;
+
+ if (!attrs)
+ return -ENOMEM;
+ err = kobject_init_and_add(&attrs->kobj, &damon_sysfs_attrs_ktype,
+ &context->kobj, "monitoring_attrs");
+ if (err)
+ goto out;
+ err = damon_sysfs_attrs_add_dirs(attrs);
+ if (err)
+ goto out;
+ context->attrs = attrs;
+ return 0;
+
+out:
+ kobject_put(&attrs->kobj);
+ return err;
+}
+
+static int damon_sysfs_context_set_targets(struct damon_sysfs_context *context)
+{
+ struct damon_sysfs_targets *targets = damon_sysfs_targets_alloc();
+ int err;
+
+ if (!targets)
+ return -ENOMEM;
+ err = kobject_init_and_add(&targets->kobj, &damon_sysfs_targets_ktype,
+ &context->kobj, "targets");
+ if (err) {
+ kobject_put(&targets->kobj);
+ return err;
+ }
+ context->targets = targets;
+ return 0;
+}
+
+static int damon_sysfs_context_set_schemes(struct damon_sysfs_context *context)
+{
+ struct damon_sysfs_schemes *schemes = damon_sysfs_schemes_alloc();
+ int err;
+
+ if (!schemes)
+ return -ENOMEM;
+ err = kobject_init_and_add(&schemes->kobj, &damon_sysfs_schemes_ktype,
+ &context->kobj, "schemes");
+ if (err) {
+ kobject_put(&schemes->kobj);
+ return err;
+ }
+ context->schemes = schemes;
+ return 0;
+}
+
+static int damon_sysfs_context_add_dirs(struct damon_sysfs_context *context)
+{
+ int err;
+
+ err = damon_sysfs_context_set_attrs(context);
+ if (err)
+ return err;
+
+ err = damon_sysfs_context_set_targets(context);
+ if (err)
+ goto put_attrs_out;
+
+ err = damon_sysfs_context_set_schemes(context);
+ if (err)
+ goto put_targets_attrs_out;
+ return 0;
+
+put_targets_attrs_out:
+ kobject_put(&context->targets->kobj);
+ context->targets = NULL;
+put_attrs_out:
+ kobject_put(&context->attrs->kobj);
+ context->attrs = NULL;
+ return err;
+}
+
+static void damon_sysfs_context_rm_dirs(struct damon_sysfs_context *context)
+{
+ damon_sysfs_attrs_rm_dirs(context->attrs);
+ kobject_put(&context->attrs->kobj);
+ damon_sysfs_targets_rm_dirs(context->targets);
+ kobject_put(&context->targets->kobj);
+ damon_sysfs_schemes_rm_dirs(context->schemes);
+ kobject_put(&context->schemes->kobj);
+}
+
+static ssize_t avail_operations_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ int len = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damon_sysfs_ops_names); i++) {
+ const struct damon_sysfs_ops_name *ops_name;
+
+ ops_name = &damon_sysfs_ops_names[i];
+ if (!damon_is_registered_ops(ops_name->ops_id))
+ continue;
+ len += sysfs_emit_at(buf, len, "%s\n", ops_name->name);
+ }
+ return len;
+}
+
+static ssize_t operations_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_context *context = container_of(kobj,
+ struct damon_sysfs_context, kobj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damon_sysfs_ops_names); i++) {
+ const struct damon_sysfs_ops_name *ops_name;
+
+ ops_name = &damon_sysfs_ops_names[i];
+ if (ops_name->ops_id == context->ops_id)
+ return sysfs_emit(buf, "%s\n", ops_name->name);
+ }
+ return -EINVAL;
+}
+
+static ssize_t operations_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_context *context = container_of(kobj,
+ struct damon_sysfs_context, kobj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(damon_sysfs_ops_names); i++) {
+ const struct damon_sysfs_ops_name *ops_name;
+
+ ops_name = &damon_sysfs_ops_names[i];
+ if (sysfs_streq(buf, ops_name->name)) {
+ context->ops_id = ops_name->ops_id;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static ssize_t addr_unit_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_context *context = container_of(kobj,
+ struct damon_sysfs_context, kobj);
+
+ return sysfs_emit(buf, "%lu\n", context->addr_unit);
+}
+
+static ssize_t addr_unit_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_context *context = container_of(kobj,
+ struct damon_sysfs_context, kobj);
+ unsigned long input_addr_unit;
+ int err = kstrtoul(buf, 0, &input_addr_unit);
+
+ if (err)
+ return err;
+ if (!input_addr_unit)
+ return -EINVAL;
+
+ context->addr_unit = input_addr_unit;
+ return count;
+}
+
+static void damon_sysfs_context_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_context, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_context_avail_operations_attr =
+ __ATTR_RO_MODE(avail_operations, 0400);
+
+static struct kobj_attribute damon_sysfs_context_operations_attr =
+ __ATTR_RW_MODE(operations, 0600);
+
+static struct kobj_attribute damon_sysfs_context_addr_unit_attr =
+ __ATTR_RW_MODE(addr_unit, 0600);
+
+static struct attribute *damon_sysfs_context_attrs[] = {
+ &damon_sysfs_context_avail_operations_attr.attr,
+ &damon_sysfs_context_operations_attr.attr,
+ &damon_sysfs_context_addr_unit_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_context);
+
+static const struct kobj_type damon_sysfs_context_ktype = {
+ .release = damon_sysfs_context_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_context_groups,
+};
+
+/*
+ * contexts directory
+ */
+
+struct damon_sysfs_contexts {
+ struct kobject kobj;
+ struct damon_sysfs_context **contexts_arr;
+ int nr;
+};
+
+static struct damon_sysfs_contexts *damon_sysfs_contexts_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_contexts), GFP_KERNEL);
+}
+
+static void damon_sysfs_contexts_rm_dirs(struct damon_sysfs_contexts *contexts)
+{
+ struct damon_sysfs_context **contexts_arr = contexts->contexts_arr;
+ int i;
+
+ for (i = 0; i < contexts->nr; i++) {
+ damon_sysfs_context_rm_dirs(contexts_arr[i]);
+ kobject_put(&contexts_arr[i]->kobj);
+ }
+ contexts->nr = 0;
+ kfree(contexts_arr);
+ contexts->contexts_arr = NULL;
+}
+
+static int damon_sysfs_contexts_add_dirs(struct damon_sysfs_contexts *contexts,
+ int nr_contexts)
+{
+ struct damon_sysfs_context **contexts_arr, *context;
+ int err, i;
+
+ damon_sysfs_contexts_rm_dirs(contexts);
+ if (!nr_contexts)
+ return 0;
+
+ contexts_arr = kmalloc_array(nr_contexts, sizeof(*contexts_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!contexts_arr)
+ return -ENOMEM;
+ contexts->contexts_arr = contexts_arr;
+
+ for (i = 0; i < nr_contexts; i++) {
+ context = damon_sysfs_context_alloc(DAMON_OPS_VADDR);
+ if (!context) {
+ damon_sysfs_contexts_rm_dirs(contexts);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&context->kobj,
+ &damon_sysfs_context_ktype, &contexts->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+
+ err = damon_sysfs_context_add_dirs(context);
+ if (err)
+ goto out;
+
+ contexts_arr[i] = context;
+ contexts->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_contexts_rm_dirs(contexts);
+ kobject_put(&context->kobj);
+ return err;
+}
+
+static ssize_t nr_contexts_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_contexts *contexts = container_of(kobj,
+ struct damon_sysfs_contexts, kobj);
+
+ return sysfs_emit(buf, "%d\n", contexts->nr);
+}
+
+static ssize_t nr_contexts_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_contexts *contexts;
+ int nr, err;
+
+ err = kstrtoint(buf, 0, &nr);
+ if (err)
+ return err;
+ /* TODO: support multiple contexts per kdamond */
+ if (nr < 0 || 1 < nr)
+ return -EINVAL;
+
+ contexts = container_of(kobj, struct damon_sysfs_contexts, kobj);
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_contexts_add_dirs(contexts, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_contexts_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_contexts, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_contexts_nr_attr
+ = __ATTR_RW_MODE(nr_contexts, 0600);
+
+static struct attribute *damon_sysfs_contexts_attrs[] = {
+ &damon_sysfs_contexts_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_contexts);
+
+static const struct kobj_type damon_sysfs_contexts_ktype = {
+ .release = damon_sysfs_contexts_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_contexts_groups,
+};
+
+/*
+ * kdamond directory
+ */
+
+struct damon_sysfs_kdamond {
+ struct kobject kobj;
+ struct damon_sysfs_contexts *contexts;
+ struct damon_ctx *damon_ctx;
+ unsigned int refresh_ms;
+};
+
+static struct damon_sysfs_kdamond *damon_sysfs_kdamond_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_kdamond), GFP_KERNEL);
+}
+
+static int damon_sysfs_kdamond_add_dirs(struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_sysfs_contexts *contexts;
+ int err;
+
+ contexts = damon_sysfs_contexts_alloc();
+ if (!contexts)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&contexts->kobj,
+ &damon_sysfs_contexts_ktype, &kdamond->kobj,
+ "contexts");
+ if (err) {
+ kobject_put(&contexts->kobj);
+ return err;
+ }
+ kdamond->contexts = contexts;
+
+ return err;
+}
+
+static void damon_sysfs_kdamond_rm_dirs(struct damon_sysfs_kdamond *kdamond)
+{
+ damon_sysfs_contexts_rm_dirs(kdamond->contexts);
+ kobject_put(&kdamond->contexts->kobj);
+}
+
+/*
+ * enum damon_sysfs_cmd - Commands for a specific kdamond.
+ */
+enum damon_sysfs_cmd {
+ /* @DAMON_SYSFS_CMD_ON: Turn the kdamond on. */
+ DAMON_SYSFS_CMD_ON,
+ /* @DAMON_SYSFS_CMD_OFF: Turn the kdamond off. */
+ DAMON_SYSFS_CMD_OFF,
+ /* @DAMON_SYSFS_CMD_COMMIT: Update kdamond inputs. */
+ DAMON_SYSFS_CMD_COMMIT,
+ /*
+ * @DAMON_SYSFS_CMD_COMMIT_SCHEMES_QUOTA_GOALS: Commit the quota goals
+ * to DAMON.
+ */
+ DAMON_SYSFS_CMD_COMMIT_SCHEMES_QUOTA_GOALS,
+ /*
+ * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS: Update scheme stats sysfs
+ * files.
+ */
+ DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS,
+ /*
+ * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES: Update
+ * tried_regions/total_bytes sysfs files for each scheme.
+ */
+ DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES,
+ /*
+ * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS: Update schemes tried
+ * regions
+ */
+ DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS,
+ /*
+ * @DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS: Clear schemes tried
+ * regions
+ */
+ DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS,
+ /*
+ * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS: Update the
+ * effective size quota of the scheme in bytes.
+ */
+ DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS,
+ /*
+ * @DAMON_SYSFS_CMD_UPDATE_TUNED_INTERVALS: Update the tuned monitoring
+ * intervals.
+ */
+ DAMON_SYSFS_CMD_UPDATE_TUNED_INTERVALS,
+ /*
+ * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands.
+ */
+ NR_DAMON_SYSFS_CMDS,
+};
+
+/* Should match with enum damon_sysfs_cmd */
+static const char * const damon_sysfs_cmd_strs[] = {
+ "on",
+ "off",
+ "commit",
+ "commit_schemes_quota_goals",
+ "update_schemes_stats",
+ "update_schemes_tried_bytes",
+ "update_schemes_tried_regions",
+ "clear_schemes_tried_regions",
+ "update_schemes_effective_quotas",
+ "update_tuned_intervals",
+};
+
+static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ struct damon_ctx *ctx;
+ bool running = false;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+
+ ctx = kdamond->damon_ctx;
+ if (ctx)
+ running = damon_is_running(ctx);
+
+ mutex_unlock(&damon_sysfs_lock);
+
+ return sysfs_emit(buf, "%s\n", running ?
+ damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_ON] :
+ damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_OFF]);
+}
+
+static int damon_sysfs_set_attrs(struct damon_ctx *ctx,
+ struct damon_sysfs_attrs *sys_attrs)
+{
+ struct damon_sysfs_intervals *sys_intervals = sys_attrs->intervals;
+ struct damon_sysfs_intervals_goal *sys_goal =
+ sys_intervals->intervals_goal;
+ struct damon_sysfs_ul_range *sys_nr_regions =
+ sys_attrs->nr_regions_range;
+ struct damon_attrs attrs = {
+ .sample_interval = sys_intervals->sample_us,
+ .aggr_interval = sys_intervals->aggr_us,
+ .intervals_goal = {
+ .access_bp = sys_goal->access_bp,
+ .aggrs = sys_goal->aggrs,
+ .min_sample_us = sys_goal->min_sample_us,
+ .max_sample_us = sys_goal->max_sample_us},
+ .ops_update_interval = sys_intervals->update_us,
+ .min_nr_regions = sys_nr_regions->min,
+ .max_nr_regions = sys_nr_regions->max,
+ };
+ return damon_set_attrs(ctx, &attrs);
+}
+
+static int damon_sysfs_set_regions(struct damon_target *t,
+ struct damon_sysfs_regions *sysfs_regions,
+ unsigned long min_sz_region)
+{
+ struct damon_addr_range *ranges = kmalloc_array(sysfs_regions->nr,
+ sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN);
+ int i, err = -EINVAL;
+
+ if (!ranges)
+ return -ENOMEM;
+ for (i = 0; i < sysfs_regions->nr; i++) {
+ struct damon_sysfs_region *sys_region =
+ sysfs_regions->regions_arr[i];
+
+ if (sys_region->ar.start > sys_region->ar.end)
+ goto out;
+
+ ranges[i].start = sys_region->ar.start;
+ ranges[i].end = sys_region->ar.end;
+ if (i == 0)
+ continue;
+ if (ranges[i - 1].end > ranges[i].start)
+ goto out;
+ }
+ err = damon_set_regions(t, ranges, sysfs_regions->nr, min_sz_region);
+out:
+ kfree(ranges);
+ return err;
+
+}
+
+static int damon_sysfs_add_target(struct damon_sysfs_target *sys_target,
+ struct damon_ctx *ctx)
+{
+ struct damon_target *t = damon_new_target();
+
+ if (!t)
+ return -ENOMEM;
+ damon_add_target(ctx, t);
+ if (damon_target_has_pid(ctx)) {
+ t->pid = find_get_pid(sys_target->pid);
+ if (!t->pid)
+ /* caller will destroy targets */
+ return -EINVAL;
+ }
+ t->obsolete = sys_target->obsolete;
+ return damon_sysfs_set_regions(t, sys_target->regions, ctx->min_sz_region);
+}
+
+static int damon_sysfs_add_targets(struct damon_ctx *ctx,
+ struct damon_sysfs_targets *sysfs_targets)
+{
+ int i, err;
+
+ /* Multiple physical address space monitoring targets makes no sense */
+ if (ctx->ops.id == DAMON_OPS_PADDR && sysfs_targets->nr > 1)
+ return -EINVAL;
+
+ for (i = 0; i < sysfs_targets->nr; i++) {
+ struct damon_sysfs_target *st = sysfs_targets->targets_arr[i];
+
+ err = damon_sysfs_add_target(st, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/*
+ * damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files.
+ * @data: The kobject wrapper that associated to the kdamond thread.
+ *
+ * This function reads the schemes stats of specific kdamond and update the
+ * related values for sysfs files. This function should be called from DAMON
+ * worker thread,to safely access the DAMON contexts-internal data. Caller
+ * should also ensure holding ``damon_syfs_lock``, and ->damon_ctx of @data is
+ * not NULL but a valid pointer, to safely access DAMON sysfs variables.
+ */
+static int damon_sysfs_upd_schemes_stats(void *data)
+{
+ struct damon_sysfs_kdamond *kdamond = data;
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+
+ damon_sysfs_schemes_update_stats(
+ kdamond->contexts->contexts_arr[0]->schemes, ctx);
+ return 0;
+}
+
+static inline bool damon_sysfs_kdamond_running(
+ struct damon_sysfs_kdamond *kdamond)
+{
+ return kdamond->damon_ctx &&
+ damon_is_running(kdamond->damon_ctx);
+}
+
+static int damon_sysfs_apply_inputs(struct damon_ctx *ctx,
+ struct damon_sysfs_context *sys_ctx)
+{
+ int err;
+
+ err = damon_select_ops(ctx, sys_ctx->ops_id);
+ if (err)
+ return err;
+ ctx->addr_unit = sys_ctx->addr_unit;
+ /* addr_unit is respected by only DAMON_OPS_PADDR */
+ if (sys_ctx->ops_id == DAMON_OPS_PADDR)
+ ctx->min_sz_region = max(
+ DAMON_MIN_REGION / sys_ctx->addr_unit, 1);
+ err = damon_sysfs_set_attrs(ctx, sys_ctx->attrs);
+ if (err)
+ return err;
+ err = damon_sysfs_add_targets(ctx, sys_ctx->targets);
+ if (err)
+ return err;
+ return damon_sysfs_add_schemes(ctx, sys_ctx->schemes);
+}
+
+static struct damon_ctx *damon_sysfs_build_ctx(
+ struct damon_sysfs_context *sys_ctx);
+
+/*
+ * Return a new damon_ctx for testing new parameters to commit.
+ */
+static struct damon_ctx *damon_sysfs_new_test_ctx(
+ struct damon_ctx *running_ctx)
+{
+ struct damon_ctx *test_ctx;
+ int err;
+
+ test_ctx = damon_new_ctx();
+ if (!test_ctx)
+ return NULL;
+ err = damon_commit_ctx(test_ctx, running_ctx);
+ if (err) {
+ damon_destroy_ctx(test_ctx);
+ return NULL;
+ }
+ return test_ctx;
+}
+
+/*
+ * damon_sysfs_commit_input() - Commit user inputs to a running kdamond.
+ * @kdamond: The kobject wrapper for the associated kdamond.
+ *
+ * Returns error if the sysfs input is wrong.
+ */
+static int damon_sysfs_commit_input(void *data)
+{
+ struct damon_sysfs_kdamond *kdamond = data;
+ struct damon_ctx *param_ctx, *test_ctx;
+ int err;
+
+ if (!damon_sysfs_kdamond_running(kdamond))
+ return -EINVAL;
+ /* TODO: Support multiple contexts per kdamond */
+ if (kdamond->contexts->nr != 1)
+ return -EINVAL;
+
+ param_ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]);
+ if (IS_ERR(param_ctx))
+ return PTR_ERR(param_ctx);
+ test_ctx = damon_sysfs_new_test_ctx(kdamond->damon_ctx);
+ if (!test_ctx)
+ return -ENOMEM;
+ err = damon_commit_ctx(test_ctx, param_ctx);
+ if (err)
+ goto out;
+ err = damon_commit_ctx(kdamond->damon_ctx, param_ctx);
+out:
+ damon_destroy_ctx(test_ctx);
+ damon_destroy_ctx(param_ctx);
+ return err;
+}
+
+static int damon_sysfs_commit_schemes_quota_goals(void *data)
+{
+ struct damon_sysfs_kdamond *sysfs_kdamond = data;
+ struct damon_ctx *ctx;
+ struct damon_sysfs_context *sysfs_ctx;
+
+ if (!damon_sysfs_kdamond_running(sysfs_kdamond))
+ return -EINVAL;
+ /* TODO: Support multiple contexts per kdamond */
+ if (sysfs_kdamond->contexts->nr != 1)
+ return -EINVAL;
+
+ ctx = sysfs_kdamond->damon_ctx;
+ sysfs_ctx = sysfs_kdamond->contexts->contexts_arr[0];
+ return damos_sysfs_set_quota_scores(sysfs_ctx->schemes, ctx);
+}
+
+/*
+ * damon_sysfs_upd_schemes_effective_quotas() - Update schemes effective quotas
+ * sysfs files.
+ * @data: The kobject wrapper that associated to the kdamond thread.
+ *
+ * This function reads the schemes' effective quotas of specific kdamond and
+ * update the related values for sysfs files. This function should be called
+ * from DAMON callbacks while holding ``damon_syfs_lock``, to safely access the
+ * DAMON contexts-internal data and DAMON sysfs variables.
+ */
+static int damon_sysfs_upd_schemes_effective_quotas(void *data)
+{
+ struct damon_sysfs_kdamond *kdamond = data;
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+
+ damos_sysfs_update_effective_quotas(
+ kdamond->contexts->contexts_arr[0]->schemes, ctx);
+ return 0;
+}
+
+static int damon_sysfs_upd_tuned_intervals(void *data)
+{
+ struct damon_sysfs_kdamond *kdamond = data;
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+
+ kdamond->contexts->contexts_arr[0]->attrs->intervals->sample_us =
+ ctx->attrs.sample_interval;
+ kdamond->contexts->contexts_arr[0]->attrs->intervals->aggr_us =
+ ctx->attrs.aggr_interval;
+ return 0;
+}
+
+static struct damon_ctx *damon_sysfs_build_ctx(
+ struct damon_sysfs_context *sys_ctx)
+{
+ struct damon_ctx *ctx = damon_new_ctx();
+ int err;
+
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ err = damon_sysfs_apply_inputs(ctx, sys_ctx);
+ if (err) {
+ damon_destroy_ctx(ctx);
+ return ERR_PTR(err);
+ }
+
+ return ctx;
+}
+
+static unsigned long damon_sysfs_next_update_jiffies;
+
+static int damon_sysfs_repeat_call_fn(void *data)
+{
+ struct damon_sysfs_kdamond *sysfs_kdamond = data;
+
+ if (!sysfs_kdamond->refresh_ms)
+ return 0;
+ if (time_before(jiffies, damon_sysfs_next_update_jiffies))
+ return 0;
+ damon_sysfs_next_update_jiffies = jiffies +
+ msecs_to_jiffies(sysfs_kdamond->refresh_ms);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return 0;
+ damon_sysfs_upd_tuned_intervals(sysfs_kdamond);
+ damon_sysfs_upd_schemes_stats(sysfs_kdamond);
+ damon_sysfs_upd_schemes_effective_quotas(sysfs_kdamond);
+ mutex_unlock(&damon_sysfs_lock);
+ return 0;
+}
+
+static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_ctx *ctx;
+ struct damon_call_control *repeat_call_control;
+ int err;
+
+ if (damon_sysfs_kdamond_running(kdamond))
+ return -EBUSY;
+ /* TODO: support multiple contexts per kdamond */
+ if (kdamond->contexts->nr != 1)
+ return -EINVAL;
+
+ if (kdamond->damon_ctx)
+ damon_destroy_ctx(kdamond->damon_ctx);
+ kdamond->damon_ctx = NULL;
+
+ repeat_call_control = kmalloc(sizeof(*repeat_call_control),
+ GFP_KERNEL);
+ if (!repeat_call_control)
+ return -ENOMEM;
+
+ ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]);
+ if (IS_ERR(ctx)) {
+ kfree(repeat_call_control);
+ return PTR_ERR(ctx);
+ }
+ err = damon_start(&ctx, 1, false);
+ if (err) {
+ kfree(repeat_call_control);
+ damon_destroy_ctx(ctx);
+ return err;
+ }
+ kdamond->damon_ctx = ctx;
+
+ damon_sysfs_next_update_jiffies =
+ jiffies + msecs_to_jiffies(kdamond->refresh_ms);
+
+ repeat_call_control->fn = damon_sysfs_repeat_call_fn;
+ repeat_call_control->data = kdamond;
+ repeat_call_control->repeat = true;
+ repeat_call_control->dealloc_on_cancel = true;
+ damon_call(ctx, repeat_call_control);
+ return err;
+}
+
+static int damon_sysfs_turn_damon_off(struct damon_sysfs_kdamond *kdamond)
+{
+ if (!kdamond->damon_ctx)
+ return -EINVAL;
+ return damon_stop(&kdamond->damon_ctx, 1);
+ /*
+ * To allow users show final monitoring results of already turned-off
+ * DAMON, we free kdamond->damon_ctx in next
+ * damon_sysfs_turn_damon_on(), or kdamonds_nr_store()
+ */
+}
+
+static int damon_sysfs_damon_call(int (*fn)(void *data),
+ struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_call_control call_control = {};
+ int err;
+
+ if (!kdamond->damon_ctx)
+ return -EINVAL;
+ call_control.fn = fn;
+ call_control.data = kdamond;
+ err = damon_call(kdamond->damon_ctx, &call_control);
+ return err ? err : call_control.return_code;
+}
+
+struct damon_sysfs_schemes_walk_data {
+ struct damon_sysfs_kdamond *sysfs_kdamond;
+ bool total_bytes_only;
+};
+
+/* populate the region directory */
+static void damon_sysfs_schemes_tried_regions_upd_one(void *data, struct damon_ctx *ctx,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *s, unsigned long sz_filter_passed)
+{
+ struct damon_sysfs_schemes_walk_data *walk_data = data;
+ struct damon_sysfs_kdamond *sysfs_kdamond = walk_data->sysfs_kdamond;
+
+ damos_sysfs_populate_region_dir(
+ sysfs_kdamond->contexts->contexts_arr[0]->schemes,
+ ctx, t, r, s, walk_data->total_bytes_only,
+ sz_filter_passed);
+}
+
+static int damon_sysfs_update_schemes_tried_regions(
+ struct damon_sysfs_kdamond *sysfs_kdamond, bool total_bytes_only)
+{
+ struct damon_sysfs_schemes_walk_data walk_data = {
+ .sysfs_kdamond = sysfs_kdamond,
+ .total_bytes_only = total_bytes_only,
+ };
+ struct damos_walk_control control = {
+ .walk_fn = damon_sysfs_schemes_tried_regions_upd_one,
+ .data = &walk_data,
+ };
+ struct damon_ctx *ctx = sysfs_kdamond->damon_ctx;
+
+ if (!ctx)
+ return -EINVAL;
+
+ damon_sysfs_schemes_clear_regions(
+ sysfs_kdamond->contexts->contexts_arr[0]->schemes);
+ return damos_walk(ctx, &control);
+}
+
+/*
+ * damon_sysfs_handle_cmd() - Handle a command for a specific kdamond.
+ * @cmd: The command to handle.
+ * @kdamond: The kobject wrapper for the associated kdamond.
+ *
+ * This function handles a DAMON sysfs command for a kdamond.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd,
+ struct damon_sysfs_kdamond *kdamond)
+{
+ switch (cmd) {
+ case DAMON_SYSFS_CMD_ON:
+ return damon_sysfs_turn_damon_on(kdamond);
+ case DAMON_SYSFS_CMD_OFF:
+ return damon_sysfs_turn_damon_off(kdamond);
+ case DAMON_SYSFS_CMD_COMMIT:
+ return damon_sysfs_damon_call(
+ damon_sysfs_commit_input, kdamond);
+ case DAMON_SYSFS_CMD_COMMIT_SCHEMES_QUOTA_GOALS:
+ return damon_sysfs_damon_call(
+ damon_sysfs_commit_schemes_quota_goals,
+ kdamond);
+ case DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS:
+ return damon_sysfs_damon_call(
+ damon_sysfs_upd_schemes_stats, kdamond);
+ case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES:
+ return damon_sysfs_update_schemes_tried_regions(kdamond, true);
+ case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS:
+ return damon_sysfs_update_schemes_tried_regions(kdamond, false);
+ case DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS:
+ return damon_sysfs_schemes_clear_regions(
+ kdamond->contexts->contexts_arr[0]->schemes);
+ case DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS:
+ return damon_sysfs_damon_call(
+ damon_sysfs_upd_schemes_effective_quotas,
+ kdamond);
+ case DAMON_SYSFS_CMD_UPDATE_TUNED_INTERVALS:
+ return damon_sysfs_damon_call(
+ damon_sysfs_upd_tuned_intervals, kdamond);
+ default:
+ return -EINVAL;
+ }
+}
+
+static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ enum damon_sysfs_cmd cmd;
+ ssize_t ret = -EINVAL;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ for (cmd = 0; cmd < NR_DAMON_SYSFS_CMDS; cmd++) {
+ if (sysfs_streq(buf, damon_sysfs_cmd_strs[cmd])) {
+ ret = damon_sysfs_handle_cmd(cmd, kdamond);
+ break;
+ }
+ }
+ mutex_unlock(&damon_sysfs_lock);
+ if (!ret)
+ ret = count;
+ return ret;
+}
+
+static ssize_t pid_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ struct damon_ctx *ctx;
+ int pid = -1;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ ctx = kdamond->damon_ctx;
+ if (!ctx)
+ goto out;
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond)
+ pid = ctx->kdamond->pid;
+ mutex_unlock(&ctx->kdamond_lock);
+out:
+ mutex_unlock(&damon_sysfs_lock);
+ return sysfs_emit(buf, "%d\n", pid);
+}
+
+static ssize_t refresh_ms_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+
+ return sysfs_emit(buf, "%u\n", kdamond->refresh_ms);
+}
+
+static ssize_t refresh_ms_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ unsigned int nr;
+ int err = kstrtouint(buf, 0, &nr);
+
+ if (err)
+ return err;
+
+ kdamond->refresh_ms = nr;
+ return count;
+}
+
+static void damon_sysfs_kdamond_release(struct kobject *kobj)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+
+ if (kdamond->damon_ctx)
+ damon_destroy_ctx(kdamond->damon_ctx);
+ kfree(kdamond);
+}
+
+static struct kobj_attribute damon_sysfs_kdamond_state_attr =
+ __ATTR_RW_MODE(state, 0600);
+
+static struct kobj_attribute damon_sysfs_kdamond_pid_attr =
+ __ATTR_RO_MODE(pid, 0400);
+
+static struct kobj_attribute damon_sysfs_kdamond_refresh_ms_attr =
+ __ATTR_RW_MODE(refresh_ms, 0600);
+
+static struct attribute *damon_sysfs_kdamond_attrs[] = {
+ &damon_sysfs_kdamond_state_attr.attr,
+ &damon_sysfs_kdamond_pid_attr.attr,
+ &damon_sysfs_kdamond_refresh_ms_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_kdamond);
+
+static const struct kobj_type damon_sysfs_kdamond_ktype = {
+ .release = damon_sysfs_kdamond_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_kdamond_groups,
+};
+
+/*
+ * kdamonds directory
+ */
+
+struct damon_sysfs_kdamonds {
+ struct kobject kobj;
+ struct damon_sysfs_kdamond **kdamonds_arr;
+ int nr;
+};
+
+static struct damon_sysfs_kdamonds *damon_sysfs_kdamonds_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_kdamonds), GFP_KERNEL);
+}
+
+static void damon_sysfs_kdamonds_rm_dirs(struct damon_sysfs_kdamonds *kdamonds)
+{
+ struct damon_sysfs_kdamond **kdamonds_arr = kdamonds->kdamonds_arr;
+ int i;
+
+ for (i = 0; i < kdamonds->nr; i++) {
+ damon_sysfs_kdamond_rm_dirs(kdamonds_arr[i]);
+ kobject_put(&kdamonds_arr[i]->kobj);
+ }
+ kdamonds->nr = 0;
+ kfree(kdamonds_arr);
+ kdamonds->kdamonds_arr = NULL;
+}
+
+static bool damon_sysfs_kdamonds_busy(struct damon_sysfs_kdamond **kdamonds,
+ int nr_kdamonds)
+{
+ int i;
+
+ for (i = 0; i < nr_kdamonds; i++) {
+ if (damon_sysfs_kdamond_running(kdamonds[i]))
+ return true;
+ }
+
+ return false;
+}
+
+static int damon_sysfs_kdamonds_add_dirs(struct damon_sysfs_kdamonds *kdamonds,
+ int nr_kdamonds)
+{
+ struct damon_sysfs_kdamond **kdamonds_arr, *kdamond;
+ int err, i;
+
+ if (damon_sysfs_kdamonds_busy(kdamonds->kdamonds_arr, kdamonds->nr))
+ return -EBUSY;
+
+ damon_sysfs_kdamonds_rm_dirs(kdamonds);
+ if (!nr_kdamonds)
+ return 0;
+
+ kdamonds_arr = kmalloc_array(nr_kdamonds, sizeof(*kdamonds_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!kdamonds_arr)
+ return -ENOMEM;
+ kdamonds->kdamonds_arr = kdamonds_arr;
+
+ for (i = 0; i < nr_kdamonds; i++) {
+ kdamond = damon_sysfs_kdamond_alloc();
+ if (!kdamond) {
+ damon_sysfs_kdamonds_rm_dirs(kdamonds);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&kdamond->kobj,
+ &damon_sysfs_kdamond_ktype, &kdamonds->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+
+ err = damon_sysfs_kdamond_add_dirs(kdamond);
+ if (err)
+ goto out;
+
+ kdamonds_arr[i] = kdamond;
+ kdamonds->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_kdamonds_rm_dirs(kdamonds);
+ kobject_put(&kdamond->kobj);
+ return err;
+}
+
+static ssize_t nr_kdamonds_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_kdamonds *kdamonds = container_of(kobj,
+ struct damon_sysfs_kdamonds, kobj);
+
+ return sysfs_emit(buf, "%d\n", kdamonds->nr);
+}
+
+static ssize_t nr_kdamonds_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_kdamonds *kdamonds;
+ int nr, err;
+
+ err = kstrtoint(buf, 0, &nr);
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ kdamonds = container_of(kobj, struct damon_sysfs_kdamonds, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_kdamonds_add_dirs(kdamonds, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_kdamonds_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_kdamonds, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_kdamonds_nr_attr =
+ __ATTR_RW_MODE(nr_kdamonds, 0600);
+
+static struct attribute *damon_sysfs_kdamonds_attrs[] = {
+ &damon_sysfs_kdamonds_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_kdamonds);
+
+static const struct kobj_type damon_sysfs_kdamonds_ktype = {
+ .release = damon_sysfs_kdamonds_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_kdamonds_groups,
+};
+
+/*
+ * damon user interface directory
+ */
+
+struct damon_sysfs_ui_dir {
+ struct kobject kobj;
+ struct damon_sysfs_kdamonds *kdamonds;
+};
+
+static struct damon_sysfs_ui_dir *damon_sysfs_ui_dir_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_ui_dir), GFP_KERNEL);
+}
+
+static int damon_sysfs_ui_dir_add_dirs(struct damon_sysfs_ui_dir *ui_dir)
+{
+ struct damon_sysfs_kdamonds *kdamonds;
+ int err;
+
+ kdamonds = damon_sysfs_kdamonds_alloc();
+ if (!kdamonds)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&kdamonds->kobj,
+ &damon_sysfs_kdamonds_ktype, &ui_dir->kobj,
+ "kdamonds");
+ if (err) {
+ kobject_put(&kdamonds->kobj);
+ return err;
+ }
+ ui_dir->kdamonds = kdamonds;
+ return err;
+}
+
+static void damon_sysfs_ui_dir_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_ui_dir, kobj));
+}
+
+static struct attribute *damon_sysfs_ui_dir_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_ui_dir);
+
+static const struct kobj_type damon_sysfs_ui_dir_ktype = {
+ .release = damon_sysfs_ui_dir_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_ui_dir_groups,
+};
+
+static int __init damon_sysfs_init(void)
+{
+ struct kobject *damon_sysfs_root;
+ struct damon_sysfs_ui_dir *admin;
+ int err;
+
+ damon_sysfs_root = kobject_create_and_add("damon", mm_kobj);
+ if (!damon_sysfs_root)
+ return -ENOMEM;
+
+ admin = damon_sysfs_ui_dir_alloc();
+ if (!admin) {
+ kobject_put(damon_sysfs_root);
+ return -ENOMEM;
+ }
+ err = kobject_init_and_add(&admin->kobj, &damon_sysfs_ui_dir_ktype,
+ damon_sysfs_root, "admin");
+ if (err)
+ goto out;
+ err = damon_sysfs_ui_dir_add_dirs(admin);
+ if (err)
+ goto out;
+ return 0;
+
+out:
+ kobject_put(&admin->kobj);
+ kobject_put(damon_sysfs_root);
+ return err;
+}
+subsys_initcall(damon_sysfs_init);
+
+#include "tests/sysfs-kunit.h"
diff --git a/mm/damon/tests/.kunitconfig b/mm/damon/tests/.kunitconfig
new file mode 100644
index 000000000000..36a450f57b58
--- /dev/null
+++ b/mm/damon/tests/.kunitconfig
@@ -0,0 +1,15 @@
+# for DAMON core
+CONFIG_KUNIT=y
+CONFIG_DAMON=y
+CONFIG_DAMON_KUNIT_TEST=y
+
+# for DAMON vaddr ops
+CONFIG_MMU=y
+CONFIG_PAGE_IDLE_FLAG=y
+CONFIG_DAMON_VADDR=y
+CONFIG_DAMON_VADDR_KUNIT_TEST=y
+
+# for DAMON sysfs interface
+CONFIG_SYSFS=y
+CONFIG_DAMON_SYSFS=y
+CONFIG_DAMON_SYSFS_KUNIT_TEST=y
diff --git a/mm/damon/tests/core-kunit.h b/mm/damon/tests/core-kunit.h
new file mode 100644
index 000000000000..a1eff023e928
--- /dev/null
+++ b/mm/damon/tests/core-kunit.h
@@ -0,0 +1,1249 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#ifdef CONFIG_DAMON_KUNIT_TEST
+
+#ifndef _DAMON_CORE_TEST_H
+#define _DAMON_CORE_TEST_H
+
+#include <kunit/test.h>
+
+static void damon_test_regions(struct kunit *test)
+{
+ struct damon_region *r;
+ struct damon_target *t;
+
+ r = damon_new_region(1, 2);
+ if (!r)
+ kunit_skip(test, "region alloc fail");
+ KUNIT_EXPECT_EQ(test, 1ul, r->ar.start);
+ KUNIT_EXPECT_EQ(test, 2ul, r->ar.end);
+ KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
+
+ t = damon_new_target();
+ if (!t) {
+ damon_free_region(r);
+ kunit_skip(test, "target alloc fail");
+ }
+ KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t));
+
+ damon_add_region(r, t);
+ KUNIT_EXPECT_EQ(test, 1u, damon_nr_regions(t));
+
+ damon_destroy_region(r, t);
+ KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t));
+
+ damon_free_target(t);
+}
+
+static unsigned int nr_damon_targets(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ unsigned int nr_targets = 0;
+
+ damon_for_each_target(t, ctx)
+ nr_targets++;
+
+ return nr_targets;
+}
+
+static void damon_test_target(struct kunit *test)
+{
+ struct damon_ctx *c = damon_new_ctx();
+ struct damon_target *t;
+
+ if (!c)
+ kunit_skip(test, "ctx alloc fail");
+
+ t = damon_new_target();
+ if (!t) {
+ damon_destroy_ctx(c);
+ kunit_skip(test, "target alloc fail");
+ }
+ KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c));
+
+ damon_add_target(c, t);
+ KUNIT_EXPECT_EQ(test, 1u, nr_damon_targets(c));
+
+ damon_destroy_target(t, c);
+ KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c));
+
+ damon_destroy_ctx(c);
+}
+
+/*
+ * Test kdamond_reset_aggregated()
+ *
+ * DAMON checks access to each region and aggregates this information as the
+ * access frequency of each region. In detail, it increases '->nr_accesses' of
+ * regions that an access has confirmed. 'kdamond_reset_aggregated()' flushes
+ * the aggregated information ('->nr_accesses' of each regions) to the result
+ * buffer. As a result of the flushing, the '->nr_accesses' of regions are
+ * initialized to zero.
+ */
+static void damon_test_aggregate(struct kunit *test)
+{
+ struct damon_ctx *ctx = damon_new_ctx();
+ unsigned long saddr[][3] = {{10, 20, 30}, {5, 42, 49}, {13, 33, 55} };
+ unsigned long eaddr[][3] = {{15, 27, 40}, {31, 45, 55}, {23, 44, 66} };
+ unsigned long accesses[][3] = {{42, 95, 84}, {10, 20, 30}, {0, 1, 2} };
+ struct damon_target *t;
+ struct damon_region *r;
+ int it, ir;
+
+ if (!ctx)
+ kunit_skip(test, "ctx alloc fail");
+
+ for (it = 0; it < 3; it++) {
+ t = damon_new_target();
+ if (!t) {
+ damon_destroy_ctx(ctx);
+ kunit_skip(test, "target alloc fail");
+ }
+ damon_add_target(ctx, t);
+ }
+
+ it = 0;
+ damon_for_each_target(t, ctx) {
+ for (ir = 0; ir < 3; ir++) {
+ r = damon_new_region(saddr[it][ir], eaddr[it][ir]);
+ if (!r) {
+ damon_destroy_ctx(ctx);
+ kunit_skip(test, "region alloc fail");
+ }
+ r->nr_accesses = accesses[it][ir];
+ r->nr_accesses_bp = accesses[it][ir] * 10000;
+ damon_add_region(r, t);
+ }
+ it++;
+ }
+ kdamond_reset_aggregated(ctx);
+ it = 0;
+ damon_for_each_target(t, ctx) {
+ ir = 0;
+ /* '->nr_accesses' should be zeroed */
+ damon_for_each_region(r, t) {
+ KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
+ ir++;
+ }
+ /* regions should be preserved */
+ KUNIT_EXPECT_EQ(test, 3, ir);
+ it++;
+ }
+ /* targets also should be preserved */
+ KUNIT_EXPECT_EQ(test, 3, it);
+
+ damon_destroy_ctx(ctx);
+}
+
+static void damon_test_split_at(struct kunit *test)
+{
+ struct damon_target *t;
+ struct damon_region *r, *r_new;
+
+ t = damon_new_target();
+ if (!t)
+ kunit_skip(test, "target alloc fail");
+ r = damon_new_region(0, 100);
+ if (!r) {
+ damon_free_target(t);
+ kunit_skip(test, "region alloc fail");
+ }
+ r->nr_accesses_bp = 420000;
+ r->nr_accesses = 42;
+ r->last_nr_accesses = 15;
+ damon_add_region(r, t);
+ damon_split_region_at(t, r, 25);
+ KUNIT_EXPECT_EQ(test, r->ar.start, 0ul);
+ KUNIT_EXPECT_EQ(test, r->ar.end, 25ul);
+
+ r_new = damon_next_region(r);
+ KUNIT_EXPECT_EQ(test, r_new->ar.start, 25ul);
+ KUNIT_EXPECT_EQ(test, r_new->ar.end, 100ul);
+
+ KUNIT_EXPECT_EQ(test, r->nr_accesses_bp, r_new->nr_accesses_bp);
+ KUNIT_EXPECT_EQ(test, r->nr_accesses, r_new->nr_accesses);
+ KUNIT_EXPECT_EQ(test, r->last_nr_accesses, r_new->last_nr_accesses);
+
+ damon_free_target(t);
+}
+
+static void damon_test_merge_two(struct kunit *test)
+{
+ struct damon_target *t;
+ struct damon_region *r, *r2, *r3;
+ int i;
+
+ t = damon_new_target();
+ if (!t)
+ kunit_skip(test, "target alloc fail");
+ r = damon_new_region(0, 100);
+ if (!r) {
+ damon_free_target(t);
+ kunit_skip(test, "region alloc fail");
+ }
+ r->nr_accesses = 10;
+ r->nr_accesses_bp = 100000;
+ damon_add_region(r, t);
+ r2 = damon_new_region(100, 300);
+ if (!r2) {
+ damon_free_target(t);
+ kunit_skip(test, "second region alloc fail");
+ }
+ r2->nr_accesses = 20;
+ r2->nr_accesses_bp = 200000;
+ damon_add_region(r2, t);
+
+ damon_merge_two_regions(t, r, r2);
+ KUNIT_EXPECT_EQ(test, r->ar.start, 0ul);
+ KUNIT_EXPECT_EQ(test, r->ar.end, 300ul);
+ KUNIT_EXPECT_EQ(test, r->nr_accesses, 16u);
+
+ i = 0;
+ damon_for_each_region(r3, t) {
+ KUNIT_EXPECT_PTR_EQ(test, r, r3);
+ i++;
+ }
+ KUNIT_EXPECT_EQ(test, i, 1);
+
+ damon_free_target(t);
+}
+
+static struct damon_region *__nth_region_of(struct damon_target *t, int idx)
+{
+ struct damon_region *r;
+ unsigned int i = 0;
+
+ damon_for_each_region(r, t) {
+ if (i++ == idx)
+ return r;
+ }
+
+ return NULL;
+}
+
+static void damon_test_merge_regions_of(struct kunit *test)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned long sa[] = {0, 100, 114, 122, 130, 156, 170, 184};
+ unsigned long ea[] = {100, 112, 122, 130, 156, 170, 184, 230};
+ unsigned int nrs[] = {0, 0, 10, 10, 20, 30, 1, 2};
+
+ unsigned long saddrs[] = {0, 114, 130, 156, 170};
+ unsigned long eaddrs[] = {112, 130, 156, 170, 230};
+ int i;
+
+ t = damon_new_target();
+ if (!t)
+ kunit_skip(test, "target alloc fail");
+ for (i = 0; i < ARRAY_SIZE(sa); i++) {
+ r = damon_new_region(sa[i], ea[i]);
+ if (!r) {
+ damon_free_target(t);
+ kunit_skip(test, "region alloc fail");
+ }
+ r->nr_accesses = nrs[i];
+ r->nr_accesses_bp = nrs[i] * 10000;
+ damon_add_region(r, t);
+ }
+
+ damon_merge_regions_of(t, 9, 9999);
+ /* 0-112, 114-130, 130-156, 156-170 */
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u);
+ for (i = 0; i < 5; i++) {
+ r = __nth_region_of(t, i);
+ KUNIT_EXPECT_EQ(test, r->ar.start, saddrs[i]);
+ KUNIT_EXPECT_EQ(test, r->ar.end, eaddrs[i]);
+ }
+ damon_free_target(t);
+}
+
+static void damon_test_split_regions_of(struct kunit *test)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+
+ t = damon_new_target();
+ if (!t)
+ kunit_skip(test, "target alloc fail");
+ r = damon_new_region(0, 22);
+ if (!r) {
+ damon_free_target(t);
+ kunit_skip(test, "region alloc fail");
+ }
+ damon_add_region(r, t);
+ damon_split_regions_of(t, 2, 1);
+ KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u);
+ damon_free_target(t);
+
+ t = damon_new_target();
+ if (!t)
+ kunit_skip(test, "second target alloc fail");
+ r = damon_new_region(0, 220);
+ if (!r) {
+ damon_free_target(t);
+ kunit_skip(test, "second region alloc fail");
+ }
+ damon_add_region(r, t);
+ damon_split_regions_of(t, 4, 1);
+ KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u);
+ damon_free_target(t);
+}
+
+static void damon_test_ops_registration(struct kunit *test)
+{
+ struct damon_ctx *c = damon_new_ctx();
+ struct damon_operations ops = {.id = DAMON_OPS_VADDR}, bak;
+ bool need_cleanup = false;
+
+ if (!c)
+ kunit_skip(test, "ctx alloc fail");
+
+ /* DAMON_OPS_VADDR is registered only if CONFIG_DAMON_VADDR is set */
+ if (!damon_is_registered_ops(DAMON_OPS_VADDR)) {
+ bak.id = DAMON_OPS_VADDR;
+ KUNIT_EXPECT_EQ(test, damon_register_ops(&bak), 0);
+ need_cleanup = true;
+ }
+
+ /* DAMON_OPS_VADDR is ensured to be registered */
+ KUNIT_EXPECT_EQ(test, damon_select_ops(c, DAMON_OPS_VADDR), 0);
+
+ /* Double-registration is prohibited */
+ KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), -EINVAL);
+
+ /* Unknown ops id cannot be registered */
+ KUNIT_EXPECT_EQ(test, damon_select_ops(c, NR_DAMON_OPS), -EINVAL);
+
+ /* Registration should success after unregistration */
+ mutex_lock(&damon_ops_lock);
+ bak = damon_registered_ops[DAMON_OPS_VADDR];
+ damon_registered_ops[DAMON_OPS_VADDR] = (struct damon_operations){};
+ mutex_unlock(&damon_ops_lock);
+
+ ops.id = DAMON_OPS_VADDR;
+ KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), 0);
+
+ mutex_lock(&damon_ops_lock);
+ damon_registered_ops[DAMON_OPS_VADDR] = bak;
+ mutex_unlock(&damon_ops_lock);
+
+ /* Check double-registration failure again */
+ KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), -EINVAL);
+
+ damon_destroy_ctx(c);
+
+ if (need_cleanup) {
+ mutex_lock(&damon_ops_lock);
+ damon_registered_ops[DAMON_OPS_VADDR] =
+ (struct damon_operations){};
+ mutex_unlock(&damon_ops_lock);
+ }
+}
+
+static void damon_test_set_regions(struct kunit *test)
+{
+ struct damon_target *t = damon_new_target();
+ struct damon_region *r1, *r2;
+ struct damon_addr_range range = {.start = 8, .end = 28};
+ unsigned long expects[] = {8, 16, 16, 24, 24, 28};
+ int expect_idx = 0;
+ struct damon_region *r;
+
+ if (!t)
+ kunit_skip(test, "target alloc fail");
+ r1 = damon_new_region(4, 16);
+ if (!r1) {
+ damon_free_target(t);
+ kunit_skip(test, "region alloc fail");
+ }
+ r2 = damon_new_region(24, 32);
+ if (!r2) {
+ damon_free_target(t);
+ damon_free_region(r1);
+ kunit_skip(test, "second region alloc fail");
+ }
+
+ damon_add_region(r1, t);
+ damon_add_region(r2, t);
+ damon_set_regions(t, &range, 1, 1);
+
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 3);
+ damon_for_each_region(r, t) {
+ KUNIT_EXPECT_EQ(test, r->ar.start, expects[expect_idx++]);
+ KUNIT_EXPECT_EQ(test, r->ar.end, expects[expect_idx++]);
+ }
+ damon_destroy_target(t, NULL);
+}
+
+static void damon_test_nr_accesses_to_accesses_bp(struct kunit *test)
+{
+ struct damon_attrs attrs = {
+ .sample_interval = 10,
+ .aggr_interval = ((unsigned long)UINT_MAX + 1) * 10
+ };
+
+ /*
+ * In some cases such as 32bit architectures where UINT_MAX is
+ * ULONG_MAX, attrs.aggr_interval becomes zero. Calling
+ * damon_nr_accesses_to_accesses_bp() in the case will cause
+ * divide-by-zero. Such case is prohibited in normal execution since
+ * the caution is documented on the comment for the function, and
+ * damon_update_monitoring_results() does the check. Skip the test in
+ * the case.
+ */
+ if (!attrs.aggr_interval)
+ kunit_skip(test, "aggr_interval is zero.");
+
+ KUNIT_EXPECT_EQ(test, damon_nr_accesses_to_accesses_bp(123, &attrs), 0);
+}
+
+static void damon_test_update_monitoring_result(struct kunit *test)
+{
+ struct damon_attrs old_attrs = {
+ .sample_interval = 10, .aggr_interval = 1000,};
+ struct damon_attrs new_attrs;
+ struct damon_region *r = damon_new_region(3, 7);
+
+ if (!r)
+ kunit_skip(test, "region alloc fail");
+
+ r->nr_accesses = 15;
+ r->nr_accesses_bp = 150000;
+ r->age = 20;
+
+ new_attrs = (struct damon_attrs){
+ .sample_interval = 100, .aggr_interval = 10000,};
+ damon_update_monitoring_result(r, &old_attrs, &new_attrs, false);
+ KUNIT_EXPECT_EQ(test, r->nr_accesses, 15);
+ KUNIT_EXPECT_EQ(test, r->age, 2);
+
+ new_attrs = (struct damon_attrs){
+ .sample_interval = 1, .aggr_interval = 1000};
+ damon_update_monitoring_result(r, &old_attrs, &new_attrs, false);
+ KUNIT_EXPECT_EQ(test, r->nr_accesses, 150);
+ KUNIT_EXPECT_EQ(test, r->age, 2);
+
+ new_attrs = (struct damon_attrs){
+ .sample_interval = 1, .aggr_interval = 100};
+ damon_update_monitoring_result(r, &old_attrs, &new_attrs, false);
+ KUNIT_EXPECT_EQ(test, r->nr_accesses, 150);
+ KUNIT_EXPECT_EQ(test, r->age, 20);
+
+ damon_free_region(r);
+}
+
+static void damon_test_set_attrs(struct kunit *test)
+{
+ struct damon_ctx *c = damon_new_ctx();
+ struct damon_attrs valid_attrs = {
+ .min_nr_regions = 10, .max_nr_regions = 1000,
+ .sample_interval = 5000, .aggr_interval = 100000,};
+ struct damon_attrs invalid_attrs;
+
+ if (!c)
+ kunit_skip(test, "ctx alloc fail");
+
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &valid_attrs), 0);
+
+ invalid_attrs = valid_attrs;
+ invalid_attrs.min_nr_regions = 1;
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
+
+ invalid_attrs = valid_attrs;
+ invalid_attrs.max_nr_regions = 9;
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
+
+ invalid_attrs = valid_attrs;
+ invalid_attrs.aggr_interval = 4999;
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
+
+ damon_destroy_ctx(c);
+}
+
+static void damon_test_moving_sum(struct kunit *test)
+{
+ unsigned int mvsum = 50000, nomvsum = 50000, len_window = 10;
+ unsigned int new_values[] = {10000, 0, 10000, 0, 0, 0, 10000, 0, 0, 0};
+ unsigned int expects[] = {55000, 50000, 55000, 50000, 45000, 40000,
+ 45000, 40000, 35000, 30000};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(new_values); i++) {
+ mvsum = damon_moving_sum(mvsum, nomvsum, len_window,
+ new_values[i]);
+ KUNIT_EXPECT_EQ(test, mvsum, expects[i]);
+ }
+}
+
+static void damos_test_new_filter(struct kunit *test)
+{
+ struct damos_filter *filter;
+
+ filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true, false);
+ if (!filter)
+ kunit_skip(test, "filter alloc fail");
+ KUNIT_EXPECT_EQ(test, filter->type, DAMOS_FILTER_TYPE_ANON);
+ KUNIT_EXPECT_EQ(test, filter->matching, true);
+ KUNIT_EXPECT_PTR_EQ(test, filter->list.prev, &filter->list);
+ KUNIT_EXPECT_PTR_EQ(test, filter->list.next, &filter->list);
+ damos_destroy_filter(filter);
+}
+
+static void damos_test_commit_quota_goal_for(struct kunit *test,
+ struct damos_quota_goal *dst,
+ struct damos_quota_goal *src)
+{
+ u64 dst_last_psi_total = 0;
+
+ if (dst->metric == DAMOS_QUOTA_SOME_MEM_PSI_US)
+ dst_last_psi_total = dst->last_psi_total;
+ damos_commit_quota_goal(dst, src);
+
+ KUNIT_EXPECT_EQ(test, dst->metric, src->metric);
+ KUNIT_EXPECT_EQ(test, dst->target_value, src->target_value);
+ if (src->metric == DAMOS_QUOTA_USER_INPUT)
+ KUNIT_EXPECT_EQ(test, dst->current_value, src->current_value);
+ if (dst_last_psi_total && src->metric == DAMOS_QUOTA_SOME_MEM_PSI_US)
+ KUNIT_EXPECT_EQ(test, dst->last_psi_total, dst_last_psi_total);
+ switch (dst->metric) {
+ case DAMOS_QUOTA_NODE_MEM_USED_BP:
+ case DAMOS_QUOTA_NODE_MEM_FREE_BP:
+ KUNIT_EXPECT_EQ(test, dst->nid, src->nid);
+ break;
+ case DAMOS_QUOTA_NODE_MEMCG_USED_BP:
+ case DAMOS_QUOTA_NODE_MEMCG_FREE_BP:
+ KUNIT_EXPECT_EQ(test, dst->nid, src->nid);
+ KUNIT_EXPECT_EQ(test, dst->memcg_id, src->memcg_id);
+ break;
+ default:
+ break;
+ }
+}
+
+static void damos_test_commit_quota_goal(struct kunit *test)
+{
+ struct damos_quota_goal dst = {
+ .metric = DAMOS_QUOTA_SOME_MEM_PSI_US,
+ .target_value = 1000,
+ .current_value = 123,
+ .last_psi_total = 456,
+ };
+
+ damos_test_commit_quota_goal_for(test, &dst,
+ &(struct damos_quota_goal){
+ .metric = DAMOS_QUOTA_USER_INPUT,
+ .target_value = 789,
+ .current_value = 12});
+ damos_test_commit_quota_goal_for(test, &dst,
+ &(struct damos_quota_goal){
+ .metric = DAMOS_QUOTA_NODE_MEM_FREE_BP,
+ .target_value = 345,
+ .current_value = 678,
+ .nid = 9,
+ });
+ damos_test_commit_quota_goal_for(test, &dst,
+ &(struct damos_quota_goal){
+ .metric = DAMOS_QUOTA_NODE_MEM_USED_BP,
+ .target_value = 12,
+ .current_value = 345,
+ .nid = 6,
+ });
+ damos_test_commit_quota_goal_for(test, &dst,
+ &(struct damos_quota_goal){
+ .metric = DAMOS_QUOTA_NODE_MEMCG_USED_BP,
+ .target_value = 456,
+ .current_value = 567,
+ .nid = 6,
+ .memcg_id = 7,
+ });
+ damos_test_commit_quota_goal_for(test, &dst,
+ &(struct damos_quota_goal){
+ .metric = DAMOS_QUOTA_NODE_MEMCG_FREE_BP,
+ .target_value = 890,
+ .current_value = 901,
+ .nid = 10,
+ .memcg_id = 1,
+ });
+ damos_test_commit_quota_goal_for(test, &dst,
+ &(struct damos_quota_goal) {
+ .metric = DAMOS_QUOTA_USER_INPUT,
+ .target_value = 789,
+ .current_value = 12,
+ });
+}
+
+static void damos_test_commit_quota_goals_for(struct kunit *test,
+ struct damos_quota_goal *dst_goals, int nr_dst_goals,
+ struct damos_quota_goal *src_goals, int nr_src_goals)
+{
+ struct damos_quota dst, src;
+ struct damos_quota_goal *goal, *next;
+ bool skip = true;
+ int i;
+
+ INIT_LIST_HEAD(&dst.goals);
+ INIT_LIST_HEAD(&src.goals);
+
+ for (i = 0; i < nr_dst_goals; i++) {
+ /*
+ * When nr_src_goals is smaller than dst_goals,
+ * damos_commit_quota_goals() will kfree() the dst goals.
+ * Make it kfree()-able.
+ */
+ goal = damos_new_quota_goal(dst_goals[i].metric,
+ dst_goals[i].target_value);
+ if (!goal)
+ goto out;
+ damos_add_quota_goal(&dst, goal);
+ }
+ skip = false;
+ for (i = 0; i < nr_src_goals; i++)
+ damos_add_quota_goal(&src, &src_goals[i]);
+
+ damos_commit_quota_goals(&dst, &src);
+
+ i = 0;
+ damos_for_each_quota_goal(goal, (&dst)) {
+ KUNIT_EXPECT_EQ(test, goal->metric, src_goals[i].metric);
+ KUNIT_EXPECT_EQ(test, goal->target_value,
+ src_goals[i++].target_value);
+ }
+ KUNIT_EXPECT_EQ(test, i, nr_src_goals);
+
+out:
+ damos_for_each_quota_goal_safe(goal, next, (&dst))
+ damos_destroy_quota_goal(goal);
+ if (skip)
+ kunit_skip(test, "goal alloc fail");
+}
+
+static void damos_test_commit_quota_goals(struct kunit *test)
+{
+ damos_test_commit_quota_goals_for(test,
+ (struct damos_quota_goal[]){}, 0,
+ (struct damos_quota_goal[]){
+ {
+ .metric = DAMOS_QUOTA_USER_INPUT,
+ .target_value = 123,
+ },
+ }, 1);
+ damos_test_commit_quota_goals_for(test,
+ (struct damos_quota_goal[]){
+ {
+ .metric = DAMOS_QUOTA_USER_INPUT,
+ .target_value = 234,
+ },
+
+ }, 1,
+ (struct damos_quota_goal[]){
+ {
+ .metric = DAMOS_QUOTA_USER_INPUT,
+ .target_value = 345,
+ },
+ }, 1);
+ damos_test_commit_quota_goals_for(test,
+ (struct damos_quota_goal[]){
+ {
+ .metric = DAMOS_QUOTA_USER_INPUT,
+ .target_value = 456,
+ },
+
+ }, 1,
+ (struct damos_quota_goal[]){}, 0);
+}
+
+static void damos_test_commit_quota(struct kunit *test)
+{
+ struct damos_quota dst = {
+ .reset_interval = 1,
+ .ms = 2,
+ .sz = 3,
+ .weight_sz = 4,
+ .weight_nr_accesses = 5,
+ .weight_age = 6,
+ };
+ struct damos_quota src = {
+ .reset_interval = 7,
+ .ms = 8,
+ .sz = 9,
+ .weight_sz = 10,
+ .weight_nr_accesses = 11,
+ .weight_age = 12,
+ };
+
+ INIT_LIST_HEAD(&dst.goals);
+ INIT_LIST_HEAD(&src.goals);
+
+ damos_commit_quota(&dst, &src);
+
+ KUNIT_EXPECT_EQ(test, dst.reset_interval, src.reset_interval);
+ KUNIT_EXPECT_EQ(test, dst.ms, src.ms);
+ KUNIT_EXPECT_EQ(test, dst.sz, src.sz);
+ KUNIT_EXPECT_EQ(test, dst.weight_sz, src.weight_sz);
+ KUNIT_EXPECT_EQ(test, dst.weight_nr_accesses, src.weight_nr_accesses);
+ KUNIT_EXPECT_EQ(test, dst.weight_age, src.weight_age);
+}
+
+static int damos_test_help_dests_setup(struct damos_migrate_dests *dests,
+ unsigned int *node_id_arr, unsigned int *weight_arr,
+ size_t nr_dests)
+{
+ size_t i;
+
+ dests->node_id_arr = kmalloc_array(nr_dests,
+ sizeof(*dests->node_id_arr), GFP_KERNEL);
+ if (!dests->node_id_arr)
+ return -ENOMEM;
+ dests->weight_arr = kmalloc_array(nr_dests,
+ sizeof(*dests->weight_arr), GFP_KERNEL);
+ if (!dests->weight_arr) {
+ kfree(dests->node_id_arr);
+ dests->node_id_arr = NULL;
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nr_dests; i++) {
+ dests->node_id_arr[i] = node_id_arr[i];
+ dests->weight_arr[i] = weight_arr[i];
+ }
+ dests->nr_dests = nr_dests;
+ return 0;
+}
+
+static void damos_test_help_dests_free(struct damos_migrate_dests *dests)
+{
+ kfree(dests->node_id_arr);
+ kfree(dests->weight_arr);
+}
+
+static void damos_test_commit_dests_for(struct kunit *test,
+ unsigned int *dst_node_id_arr, unsigned int *dst_weight_arr,
+ size_t dst_nr_dests,
+ unsigned int *src_node_id_arr, unsigned int *src_weight_arr,
+ size_t src_nr_dests)
+{
+ struct damos_migrate_dests dst = {}, src = {};
+ int i, err;
+ bool skip = true;
+
+ err = damos_test_help_dests_setup(&dst, dst_node_id_arr,
+ dst_weight_arr, dst_nr_dests);
+ if (err)
+ kunit_skip(test, "dests setup fail");
+ err = damos_test_help_dests_setup(&src, src_node_id_arr,
+ src_weight_arr, src_nr_dests);
+ if (err) {
+ damos_test_help_dests_free(&dst);
+ kunit_skip(test, "src setup fail");
+ }
+ err = damos_commit_dests(&dst, &src);
+ if (err)
+ goto out;
+ skip = false;
+
+ KUNIT_EXPECT_EQ(test, dst.nr_dests, src_nr_dests);
+ for (i = 0; i < dst.nr_dests; i++) {
+ KUNIT_EXPECT_EQ(test, dst.node_id_arr[i], src_node_id_arr[i]);
+ KUNIT_EXPECT_EQ(test, dst.weight_arr[i], src_weight_arr[i]);
+ }
+
+out:
+ damos_test_help_dests_free(&dst);
+ damos_test_help_dests_free(&src);
+ if (skip)
+ kunit_skip(test, "skip");
+}
+
+static void damos_test_commit_dests(struct kunit *test)
+{
+ damos_test_commit_dests_for(test,
+ (unsigned int[]){1, 2, 3}, (unsigned int[]){2, 3, 4},
+ 3,
+ (unsigned int[]){4, 5, 6}, (unsigned int[]){5, 6, 7},
+ 3);
+ damos_test_commit_dests_for(test,
+ (unsigned int[]){1, 2}, (unsigned int[]){2, 3},
+ 2,
+ (unsigned int[]){4, 5, 6}, (unsigned int[]){5, 6, 7},
+ 3);
+ damos_test_commit_dests_for(test,
+ NULL, NULL, 0,
+ (unsigned int[]){4, 5, 6}, (unsigned int[]){5, 6, 7},
+ 3);
+ damos_test_commit_dests_for(test,
+ (unsigned int[]){1, 2, 3}, (unsigned int[]){2, 3, 4},
+ 3,
+ (unsigned int[]){4, 5}, (unsigned int[]){5, 6}, 2);
+ damos_test_commit_dests_for(test,
+ (unsigned int[]){1, 2, 3}, (unsigned int[]){2, 3, 4},
+ 3,
+ NULL, NULL, 0);
+}
+
+static void damos_test_commit_filter_for(struct kunit *test,
+ struct damos_filter *dst, struct damos_filter *src)
+{
+ damos_commit_filter(dst, src);
+ KUNIT_EXPECT_EQ(test, dst->type, src->type);
+ KUNIT_EXPECT_EQ(test, dst->matching, src->matching);
+ KUNIT_EXPECT_EQ(test, dst->allow, src->allow);
+ switch (src->type) {
+ case DAMOS_FILTER_TYPE_MEMCG:
+ KUNIT_EXPECT_EQ(test, dst->memcg_id, src->memcg_id);
+ break;
+ case DAMOS_FILTER_TYPE_ADDR:
+ KUNIT_EXPECT_EQ(test, dst->addr_range.start,
+ src->addr_range.start);
+ KUNIT_EXPECT_EQ(test, dst->addr_range.end,
+ src->addr_range.end);
+ break;
+ case DAMOS_FILTER_TYPE_TARGET:
+ KUNIT_EXPECT_EQ(test, dst->target_idx, src->target_idx);
+ break;
+ case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE:
+ KUNIT_EXPECT_EQ(test, dst->sz_range.min, src->sz_range.min);
+ KUNIT_EXPECT_EQ(test, dst->sz_range.max, src->sz_range.max);
+ break;
+ default:
+ break;
+ }
+}
+
+static void damos_test_commit_filter(struct kunit *test)
+{
+ struct damos_filter dst = {
+ .type = DAMOS_FILTER_TYPE_ACTIVE,
+ .matching = false,
+ .allow = false,
+ };
+
+ damos_test_commit_filter_for(test, &dst,
+ &(struct damos_filter){
+ .type = DAMOS_FILTER_TYPE_ANON,
+ .matching = true,
+ .allow = true,
+ });
+ damos_test_commit_filter_for(test, &dst,
+ &(struct damos_filter){
+ .type = DAMOS_FILTER_TYPE_MEMCG,
+ .matching = false,
+ .allow = false,
+ .memcg_id = 123,
+ });
+ damos_test_commit_filter_for(test, &dst,
+ &(struct damos_filter){
+ .type = DAMOS_FILTER_TYPE_YOUNG,
+ .matching = true,
+ .allow = true,
+ });
+ damos_test_commit_filter_for(test, &dst,
+ &(struct damos_filter){
+ .type = DAMOS_FILTER_TYPE_HUGEPAGE_SIZE,
+ .matching = false,
+ .allow = false,
+ .sz_range = {.min = 234, .max = 345},
+ });
+ damos_test_commit_filter_for(test, &dst,
+ &(struct damos_filter){
+ .type = DAMOS_FILTER_TYPE_UNMAPPED,
+ .matching = true,
+ .allow = true,
+ });
+ damos_test_commit_filter_for(test, &dst,
+ &(struct damos_filter){
+ .type = DAMOS_FILTER_TYPE_ADDR,
+ .matching = false,
+ .allow = false,
+ .addr_range = {.start = 456, .end = 567},
+ });
+ damos_test_commit_filter_for(test, &dst,
+ &(struct damos_filter){
+ .type = DAMOS_FILTER_TYPE_TARGET,
+ .matching = true,
+ .allow = true,
+ .target_idx = 6,
+ });
+}
+
+static void damos_test_help_initailize_scheme(struct damos *scheme)
+{
+ INIT_LIST_HEAD(&scheme->quota.goals);
+ INIT_LIST_HEAD(&scheme->core_filters);
+ INIT_LIST_HEAD(&scheme->ops_filters);
+}
+
+static void damos_test_commit_for(struct kunit *test, struct damos *dst,
+ struct damos *src)
+{
+ int err;
+
+ damos_test_help_initailize_scheme(dst);
+ damos_test_help_initailize_scheme(src);
+
+ err = damos_commit(dst, src);
+ if (err)
+ kunit_skip(test, "damos_commit fail");
+
+ KUNIT_EXPECT_EQ(test, dst->pattern.min_sz_region,
+ src->pattern.min_sz_region);
+ KUNIT_EXPECT_EQ(test, dst->pattern.max_sz_region,
+ src->pattern.max_sz_region);
+ KUNIT_EXPECT_EQ(test, dst->pattern.min_nr_accesses,
+ src->pattern.min_nr_accesses);
+ KUNIT_EXPECT_EQ(test, dst->pattern.max_nr_accesses,
+ src->pattern.max_nr_accesses);
+ KUNIT_EXPECT_EQ(test, dst->pattern.min_age_region,
+ src->pattern.min_age_region);
+ KUNIT_EXPECT_EQ(test, dst->pattern.max_age_region,
+ src->pattern.max_age_region);
+
+ KUNIT_EXPECT_EQ(test, dst->action, src->action);
+ KUNIT_EXPECT_EQ(test, dst->apply_interval_us, src->apply_interval_us);
+
+ KUNIT_EXPECT_EQ(test, dst->wmarks.metric, src->wmarks.metric);
+ KUNIT_EXPECT_EQ(test, dst->wmarks.interval, src->wmarks.interval);
+ KUNIT_EXPECT_EQ(test, dst->wmarks.high, src->wmarks.high);
+ KUNIT_EXPECT_EQ(test, dst->wmarks.mid, src->wmarks.mid);
+ KUNIT_EXPECT_EQ(test, dst->wmarks.low, src->wmarks.low);
+
+ switch (src->action) {
+ case DAMOS_MIGRATE_COLD:
+ case DAMOS_MIGRATE_HOT:
+ KUNIT_EXPECT_EQ(test, dst->target_nid, src->target_nid);
+ break;
+ default:
+ break;
+ }
+}
+
+static void damos_test_commit(struct kunit *test)
+{
+ damos_test_commit_for(test,
+ &(struct damos){
+ .pattern = (struct damos_access_pattern){
+ 1, 2, 3, 4, 5, 6},
+ .action = DAMOS_PAGEOUT,
+ .apply_interval_us = 1000000,
+ .wmarks = (struct damos_watermarks){
+ DAMOS_WMARK_FREE_MEM_RATE,
+ 900, 100, 50},
+ },
+ &(struct damos){
+ .pattern = (struct damos_access_pattern){
+ 2, 3, 4, 5, 6, 7},
+ .action = DAMOS_PAGEOUT,
+ .apply_interval_us = 2000000,
+ .wmarks = (struct damos_watermarks){
+ DAMOS_WMARK_FREE_MEM_RATE,
+ 800, 50, 30},
+ });
+ damos_test_commit_for(test,
+ &(struct damos){
+ .pattern = (struct damos_access_pattern){
+ 1, 2, 3, 4, 5, 6},
+ .action = DAMOS_PAGEOUT,
+ .apply_interval_us = 1000000,
+ .wmarks = (struct damos_watermarks){
+ DAMOS_WMARK_FREE_MEM_RATE,
+ 900, 100, 50},
+ },
+ &(struct damos){
+ .pattern = (struct damos_access_pattern){
+ 2, 3, 4, 5, 6, 7},
+ .action = DAMOS_MIGRATE_HOT,
+ .apply_interval_us = 2000000,
+ .target_nid = 5,
+ });
+}
+
+static struct damon_target *damon_test_help_setup_target(
+ unsigned long region_start_end[][2], int nr_regions)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ int i;
+
+ t = damon_new_target();
+ if (!t)
+ return NULL;
+ for (i = 0; i < nr_regions; i++) {
+ r = damon_new_region(region_start_end[i][0],
+ region_start_end[i][1]);
+ if (!r) {
+ damon_free_target(t);
+ return NULL;
+ }
+ damon_add_region(r, t);
+ }
+ return t;
+}
+
+static void damon_test_commit_target_regions_for(struct kunit *test,
+ unsigned long dst_start_end[][2], int nr_dst_regions,
+ unsigned long src_start_end[][2], int nr_src_regions,
+ unsigned long expect_start_end[][2], int nr_expect_regions)
+{
+ struct damon_target *dst_target, *src_target;
+ struct damon_region *r;
+ int i;
+
+ dst_target = damon_test_help_setup_target(dst_start_end, nr_dst_regions);
+ if (!dst_target)
+ kunit_skip(test, "dst target setup fail");
+ src_target = damon_test_help_setup_target(src_start_end, nr_src_regions);
+ if (!src_target) {
+ damon_free_target(dst_target);
+ kunit_skip(test, "src target setup fail");
+ }
+ damon_commit_target_regions(dst_target, src_target, 1);
+ i = 0;
+ damon_for_each_region(r, dst_target) {
+ KUNIT_EXPECT_EQ(test, r->ar.start, expect_start_end[i][0]);
+ KUNIT_EXPECT_EQ(test, r->ar.end, expect_start_end[i][1]);
+ i++;
+ }
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(dst_target), nr_expect_regions);
+ KUNIT_EXPECT_EQ(test, i, nr_expect_regions);
+ damon_free_target(dst_target);
+ damon_free_target(src_target);
+}
+
+static void damon_test_commit_target_regions(struct kunit *test)
+{
+ damon_test_commit_target_regions_for(test,
+ (unsigned long[][2]) {{3, 8}, {8, 10}}, 2,
+ (unsigned long[][2]) {{4, 6}}, 1,
+ (unsigned long[][2]) {{4, 6}}, 1);
+ damon_test_commit_target_regions_for(test,
+ (unsigned long[][2]) {{3, 8}, {8, 10}}, 2,
+ (unsigned long[][2]) {}, 0,
+ (unsigned long[][2]) {{3, 8}, {8, 10}}, 2);
+}
+
+static void damos_test_filter_out(struct kunit *test)
+{
+ struct damon_target *t;
+ struct damon_region *r, *r2;
+ struct damos_filter *f;
+
+ f = damos_new_filter(DAMOS_FILTER_TYPE_ADDR, true, false);
+ if (!f)
+ kunit_skip(test, "filter alloc fail");
+ f->addr_range = (struct damon_addr_range){.start = 2, .end = 6};
+
+ t = damon_new_target();
+ if (!t) {
+ damos_destroy_filter(f);
+ kunit_skip(test, "target alloc fail");
+ }
+ r = damon_new_region(3, 5);
+ if (!r) {
+ damos_destroy_filter(f);
+ damon_free_target(t);
+ kunit_skip(test, "region alloc fail");
+ }
+ damon_add_region(r, t);
+
+ /* region in the range */
+ KUNIT_EXPECT_TRUE(test, damos_filter_match(NULL, t, r, f, 1));
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1);
+
+ /* region before the range */
+ r->ar.start = 1;
+ r->ar.end = 2;
+ KUNIT_EXPECT_FALSE(test,
+ damos_filter_match(NULL, t, r, f, 1));
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1);
+
+ /* region after the range */
+ r->ar.start = 6;
+ r->ar.end = 8;
+ KUNIT_EXPECT_FALSE(test,
+ damos_filter_match(NULL, t, r, f, 1));
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1);
+
+ /* region started before the range */
+ r->ar.start = 1;
+ r->ar.end = 4;
+ KUNIT_EXPECT_FALSE(test, damos_filter_match(NULL, t, r, f, 1));
+ /* filter should have split the region */
+ KUNIT_EXPECT_EQ(test, r->ar.start, 1);
+ KUNIT_EXPECT_EQ(test, r->ar.end, 2);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2);
+ r2 = damon_next_region(r);
+ KUNIT_EXPECT_EQ(test, r2->ar.start, 2);
+ KUNIT_EXPECT_EQ(test, r2->ar.end, 4);
+ damon_destroy_region(r2, t);
+
+ /* region started in the range */
+ r->ar.start = 2;
+ r->ar.end = 8;
+ KUNIT_EXPECT_TRUE(test,
+ damos_filter_match(NULL, t, r, f, 1));
+ /* filter should have split the region */
+ KUNIT_EXPECT_EQ(test, r->ar.start, 2);
+ KUNIT_EXPECT_EQ(test, r->ar.end, 6);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2);
+ r2 = damon_next_region(r);
+ KUNIT_EXPECT_EQ(test, r2->ar.start, 6);
+ KUNIT_EXPECT_EQ(test, r2->ar.end, 8);
+ damon_destroy_region(r2, t);
+
+ damon_free_target(t);
+ damos_free_filter(f);
+}
+
+static void damon_test_feed_loop_next_input(struct kunit *test)
+{
+ unsigned long last_input = 900000, current_score = 200;
+
+ /*
+ * If current score is lower than the goal, which is always 10,000
+ * (read the comment on damon_feed_loop_next_input()'s comment), next
+ * input should be higher than the last input.
+ */
+ KUNIT_EXPECT_GT(test,
+ damon_feed_loop_next_input(last_input, current_score),
+ last_input);
+
+ /*
+ * If current score is higher than the goal, next input should be lower
+ * than the last input.
+ */
+ current_score = 250000000;
+ KUNIT_EXPECT_LT(test,
+ damon_feed_loop_next_input(last_input, current_score),
+ last_input);
+
+ /*
+ * The next input depends on the distance between the current score and
+ * the goal
+ */
+ KUNIT_EXPECT_GT(test,
+ damon_feed_loop_next_input(last_input, 200),
+ damon_feed_loop_next_input(last_input, 2000));
+}
+
+static void damon_test_set_filters_default_reject(struct kunit *test)
+{
+ struct damos scheme;
+ struct damos_filter *target_filter, *anon_filter;
+
+ INIT_LIST_HEAD(&scheme.core_filters);
+ INIT_LIST_HEAD(&scheme.ops_filters);
+
+ damos_set_filters_default_reject(&scheme);
+ /*
+ * No filter is installed. Allow by default on both core and ops layer
+ * filtering stages, since there are no filters at all.
+ */
+ KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, false);
+ KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, false);
+
+ target_filter = damos_new_filter(DAMOS_FILTER_TYPE_TARGET, true, true);
+ if (!target_filter)
+ kunit_skip(test, "filter alloc fail");
+ damos_add_filter(&scheme, target_filter);
+ damos_set_filters_default_reject(&scheme);
+ /*
+ * A core-handled allow-filter is installed.
+ * Rejct by default on core layer filtering stage due to the last
+ * core-layer-filter's behavior.
+ * Allow by default on ops layer filtering stage due to the absence of
+ * ops layer filters.
+ */
+ KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, true);
+ KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, false);
+
+ target_filter->allow = false;
+ damos_set_filters_default_reject(&scheme);
+ /*
+ * A core-handled reject-filter is installed.
+ * Allow by default on core layer filtering stage due to the last
+ * core-layer-filter's behavior.
+ * Allow by default on ops layer filtering stage due to the absence of
+ * ops layer filters.
+ */
+ KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, false);
+ KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, false);
+
+ anon_filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true, true);
+ if (!anon_filter) {
+ damos_free_filter(target_filter);
+ kunit_skip(test, "anon_filter alloc fail");
+ }
+ damos_add_filter(&scheme, anon_filter);
+
+ damos_set_filters_default_reject(&scheme);
+ /*
+ * A core-handled reject-filter and ops-handled allow-filter are installed.
+ * Allow by default on core layer filtering stage due to the existence
+ * of the ops-handled filter.
+ * Reject by default on ops layer filtering stage due to the last
+ * ops-layer-filter's behavior.
+ */
+ KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, false);
+ KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, true);
+
+ target_filter->allow = true;
+ damos_set_filters_default_reject(&scheme);
+ /*
+ * A core-handled allow-filter and ops-handled allow-filter are
+ * installed.
+ * Allow by default on core layer filtering stage due to the existence
+ * of the ops-handled filter.
+ * Reject by default on ops layer filtering stage due to the last
+ * ops-layer-filter's behavior.
+ */
+ KUNIT_EXPECT_EQ(test, scheme.core_filters_default_reject, false);
+ KUNIT_EXPECT_EQ(test, scheme.ops_filters_default_reject, true);
+
+ damos_free_filter(anon_filter);
+ damos_free_filter(target_filter);
+}
+
+static struct kunit_case damon_test_cases[] = {
+ KUNIT_CASE(damon_test_target),
+ KUNIT_CASE(damon_test_regions),
+ KUNIT_CASE(damon_test_aggregate),
+ KUNIT_CASE(damon_test_split_at),
+ KUNIT_CASE(damon_test_merge_two),
+ KUNIT_CASE(damon_test_merge_regions_of),
+ KUNIT_CASE(damon_test_split_regions_of),
+ KUNIT_CASE(damon_test_ops_registration),
+ KUNIT_CASE(damon_test_set_regions),
+ KUNIT_CASE(damon_test_nr_accesses_to_accesses_bp),
+ KUNIT_CASE(damon_test_update_monitoring_result),
+ KUNIT_CASE(damon_test_set_attrs),
+ KUNIT_CASE(damon_test_moving_sum),
+ KUNIT_CASE(damos_test_new_filter),
+ KUNIT_CASE(damos_test_commit_quota_goal),
+ KUNIT_CASE(damos_test_commit_quota_goals),
+ KUNIT_CASE(damos_test_commit_quota),
+ KUNIT_CASE(damos_test_commit_dests),
+ KUNIT_CASE(damos_test_commit_filter),
+ KUNIT_CASE(damos_test_commit),
+ KUNIT_CASE(damon_test_commit_target_regions),
+ KUNIT_CASE(damos_test_filter_out),
+ KUNIT_CASE(damon_test_feed_loop_next_input),
+ KUNIT_CASE(damon_test_set_filters_default_reject),
+ {},
+};
+
+static struct kunit_suite damon_test_suite = {
+ .name = "damon",
+ .test_cases = damon_test_cases,
+};
+kunit_test_suite(damon_test_suite);
+
+#endif /* _DAMON_CORE_TEST_H */
+
+#endif /* CONFIG_DAMON_KUNIT_TEST */
diff --git a/mm/damon/tests/sysfs-kunit.h b/mm/damon/tests/sysfs-kunit.h
new file mode 100644
index 000000000000..0c665ed255a3
--- /dev/null
+++ b/mm/damon/tests/sysfs-kunit.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#ifdef CONFIG_DAMON_SYSFS_KUNIT_TEST
+
+#ifndef _DAMON_SYSFS_TEST_H
+#define _DAMON_SYSFS_TEST_H
+
+#include <kunit/test.h>
+
+static unsigned int nr_damon_targets(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ unsigned int nr_targets = 0;
+
+ damon_for_each_target(t, ctx)
+ nr_targets++;
+
+ return nr_targets;
+}
+
+static int __damon_sysfs_test_get_any_pid(int min, int max)
+{
+ struct pid *pid;
+ int i;
+
+ for (i = min; i <= max; i++) {
+ pid = find_get_pid(i);
+ if (pid) {
+ put_pid(pid);
+ return i;
+ }
+ }
+ return -1;
+}
+
+static void damon_sysfs_test_add_targets(struct kunit *test)
+{
+ struct damon_sysfs_targets *sysfs_targets;
+ struct damon_sysfs_target *sysfs_target;
+ struct damon_ctx *ctx;
+
+ sysfs_targets = damon_sysfs_targets_alloc();
+ if (!sysfs_targets)
+ kunit_skip(test, "sysfs_targets alloc fail");
+ sysfs_targets->nr = 1;
+ sysfs_targets->targets_arr = kmalloc_array(1,
+ sizeof(*sysfs_targets->targets_arr), GFP_KERNEL);
+ if (!sysfs_targets->targets_arr) {
+ kfree(sysfs_targets);
+ kunit_skip(test, "targets_arr alloc fail");
+ }
+
+ sysfs_target = damon_sysfs_target_alloc();
+ if (!sysfs_target) {
+ kfree(sysfs_targets->targets_arr);
+ kfree(sysfs_targets);
+ kunit_skip(test, "sysfs_target alloc fail");
+ }
+ sysfs_target->pid = __damon_sysfs_test_get_any_pid(12, 100);
+ sysfs_target->regions = damon_sysfs_regions_alloc();
+ if (!sysfs_target->regions) {
+ kfree(sysfs_targets->targets_arr);
+ kfree(sysfs_targets);
+ kfree(sysfs_target);
+ kunit_skip(test, "sysfs_regions alloc fail");
+ }
+
+ sysfs_targets->targets_arr[0] = sysfs_target;
+
+ ctx = damon_new_ctx();
+ if (!ctx) {
+ kfree(sysfs_targets->targets_arr);
+ kfree(sysfs_targets);
+ kfree(sysfs_target->regions);
+ kfree(sysfs_target);
+ kunit_skip(test, "ctx alloc fail");
+ }
+
+ damon_sysfs_add_targets(ctx, sysfs_targets);
+ KUNIT_EXPECT_EQ(test, 1u, nr_damon_targets(ctx));
+
+ sysfs_target->pid = __damon_sysfs_test_get_any_pid(
+ sysfs_target->pid + 1, 200);
+ damon_sysfs_add_targets(ctx, sysfs_targets);
+ KUNIT_EXPECT_EQ(test, 2u, nr_damon_targets(ctx));
+
+ damon_destroy_ctx(ctx);
+ kfree(sysfs_targets->targets_arr);
+ kfree(sysfs_targets);
+ kfree(sysfs_target->regions);
+ kfree(sysfs_target);
+}
+
+static struct kunit_case damon_sysfs_test_cases[] = {
+ KUNIT_CASE(damon_sysfs_test_add_targets),
+ {},
+};
+
+static struct kunit_suite damon_sysfs_test_suite = {
+ .name = "damon-sysfs",
+ .test_cases = damon_sysfs_test_cases,
+};
+kunit_test_suite(damon_sysfs_test_suite);
+
+#endif /* _DAMON_SYSFS_TEST_H */
+
+#endif /* CONFIG_DAMON_SYSFS_KUNIT_TEST */
diff --git a/mm/damon/tests/vaddr-kunit.h b/mm/damon/tests/vaddr-kunit.h
new file mode 100644
index 000000000000..30dc5459f1d2
--- /dev/null
+++ b/mm/damon/tests/vaddr-kunit.h
@@ -0,0 +1,348 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
+
+#ifndef _DAMON_VADDR_TEST_H
+#define _DAMON_VADDR_TEST_H
+
+#include <kunit/test.h>
+
+static int __link_vmas(struct maple_tree *mt, struct vm_area_struct *vmas,
+ ssize_t nr_vmas)
+{
+ int i, ret = -ENOMEM;
+ MA_STATE(mas, mt, 0, 0);
+
+ if (!nr_vmas)
+ return 0;
+
+ mas_lock(&mas);
+ for (i = 0; i < nr_vmas; i++) {
+ mas_set_range(&mas, vmas[i].vm_start, vmas[i].vm_end - 1);
+ if (mas_store_gfp(&mas, &vmas[i], GFP_KERNEL))
+ goto failed;
+ }
+
+ ret = 0;
+failed:
+ mas_unlock(&mas);
+ return ret;
+}
+
+/*
+ * Test __damon_va_three_regions() function
+ *
+ * In case of virtual memory address spaces monitoring, DAMON converts the
+ * complex and dynamic memory mappings of each target task to three
+ * discontiguous regions which cover every mapped areas. However, the three
+ * regions should not include the two biggest unmapped areas in the original
+ * mapping, because the two biggest areas are normally the areas between 1)
+ * heap and the mmap()-ed regions, and 2) the mmap()-ed regions and stack.
+ * Because these two unmapped areas are very huge but obviously never accessed,
+ * covering the region is just a waste.
+ *
+ * '__damon_va_three_regions() receives an address space of a process. It
+ * first identifies the start of mappings, end of mappings, and the two biggest
+ * unmapped areas. After that, based on the information, it constructs the
+ * three regions and returns. For more detail, refer to the comment of
+ * 'damon_init_regions_of()' function definition in 'mm/damon.c' file.
+ *
+ * For example, suppose virtual address ranges of 10-20, 20-25, 200-210,
+ * 210-220, 300-305, and 307-330 (Other comments represent this mappings in
+ * more short form: 10-20-25, 200-210-220, 300-305, 307-330) of a process are
+ * mapped. To cover every mappings, the three regions should start with 10,
+ * and end with 305. The process also has three unmapped areas, 25-200,
+ * 220-300, and 305-307. Among those, 25-200 and 220-300 are the biggest two
+ * unmapped areas, and thus it should be converted to three regions of 10-25,
+ * 200-220, and 300-330.
+ */
+static void damon_test_three_regions_in_vmas(struct kunit *test)
+{
+ static struct mm_struct mm;
+ struct damon_addr_range regions[3] = {0};
+ /* 10-20-25, 200-210-220, 300-305, 307-330 */
+ static struct vm_area_struct vmas[] = {
+ (struct vm_area_struct) {.vm_start = 10, .vm_end = 20},
+ (struct vm_area_struct) {.vm_start = 20, .vm_end = 25},
+ (struct vm_area_struct) {.vm_start = 200, .vm_end = 210},
+ (struct vm_area_struct) {.vm_start = 210, .vm_end = 220},
+ (struct vm_area_struct) {.vm_start = 300, .vm_end = 305},
+ (struct vm_area_struct) {.vm_start = 307, .vm_end = 330},
+ };
+
+ mt_init_flags(&mm.mm_mt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_USE_RCU);
+ if (__link_vmas(&mm.mm_mt, vmas, ARRAY_SIZE(vmas)))
+ kunit_skip(test, "Failed to create VMA tree");
+
+ __damon_va_three_regions(&mm, regions);
+
+ KUNIT_EXPECT_EQ(test, 10ul, regions[0].start);
+ KUNIT_EXPECT_EQ(test, 25ul, regions[0].end);
+ KUNIT_EXPECT_EQ(test, 200ul, regions[1].start);
+ KUNIT_EXPECT_EQ(test, 220ul, regions[1].end);
+ KUNIT_EXPECT_EQ(test, 300ul, regions[2].start);
+ KUNIT_EXPECT_EQ(test, 330ul, regions[2].end);
+}
+
+static struct damon_region *__nth_region_of(struct damon_target *t, int idx)
+{
+ struct damon_region *r;
+ unsigned int i = 0;
+
+ damon_for_each_region(r, t) {
+ if (i++ == idx)
+ return r;
+ }
+
+ return NULL;
+}
+
+/*
+ * Test 'damon_set_regions()'
+ *
+ * test kunit object
+ * regions an array containing start/end addresses of current
+ * monitoring target regions
+ * nr_regions the number of the addresses in 'regions'
+ * three_regions The three regions that need to be applied now
+ * expected start/end addresses of monitoring target regions that
+ * 'three_regions' are applied
+ * nr_expected the number of addresses in 'expected'
+ *
+ * The memory mapping of the target processes changes dynamically. To follow
+ * the change, DAMON periodically reads the mappings, simplifies it to the
+ * three regions, and updates the monitoring target regions to fit in the three
+ * regions. The update of current target regions is the role of
+ * 'damon_set_regions()'.
+ *
+ * This test passes the given target regions and the new three regions that
+ * need to be applied to the function and check whether it updates the regions
+ * as expected.
+ */
+static void damon_do_test_apply_three_regions(struct kunit *test,
+ unsigned long *regions, int nr_regions,
+ struct damon_addr_range *three_regions,
+ unsigned long *expected, int nr_expected)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ int i;
+
+ t = damon_new_target();
+ if (!t)
+ kunit_skip(test, "target alloc fail");
+ for (i = 0; i < nr_regions / 2; i++) {
+ r = damon_new_region(regions[i * 2], regions[i * 2 + 1]);
+ if (!r) {
+ damon_destroy_target(t, NULL);
+ kunit_skip(test, "region alloc fail");
+ }
+ damon_add_region(r, t);
+ }
+
+ damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION);
+
+ for (i = 0; i < nr_expected / 2; i++) {
+ r = __nth_region_of(t, i);
+ KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]);
+ KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]);
+ }
+
+ damon_destroy_target(t, NULL);
+}
+
+/*
+ * This function test most common case where the three big regions are only
+ * slightly changed. Target regions should adjust their boundary (10-20-30,
+ * 50-55, 70-80, 90-100) to fit with the new big regions or remove target
+ * regions (57-79) that now out of the three regions.
+ */
+static void damon_test_apply_three_regions1(struct kunit *test)
+{
+ /* 10-20-30, 50-55-57-59, 70-80-90-100 */
+ unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+ 70, 80, 80, 90, 90, 100};
+ /* 5-27, 45-55, 73-104 */
+ struct damon_addr_range new_three_regions[3] = {
+ (struct damon_addr_range){.start = 5, .end = 27},
+ (struct damon_addr_range){.start = 45, .end = 55},
+ (struct damon_addr_range){.start = 73, .end = 104} };
+ /* 5-20-27, 45-55, 73-80-90-104 */
+ unsigned long expected[] = {5, 20, 20, 27, 45, 55,
+ 73, 80, 80, 90, 90, 104};
+
+ damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+ new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+/*
+ * Test slightly bigger change. Similar to above, but the second big region
+ * now require two target regions (50-55, 57-59) to be removed.
+ */
+static void damon_test_apply_three_regions2(struct kunit *test)
+{
+ /* 10-20-30, 50-55-57-59, 70-80-90-100 */
+ unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+ 70, 80, 80, 90, 90, 100};
+ /* 5-27, 56-57, 65-104 */
+ struct damon_addr_range new_three_regions[3] = {
+ (struct damon_addr_range){.start = 5, .end = 27},
+ (struct damon_addr_range){.start = 56, .end = 57},
+ (struct damon_addr_range){.start = 65, .end = 104} };
+ /* 5-20-27, 56-57, 65-80-90-104 */
+ unsigned long expected[] = {5, 20, 20, 27, 56, 57,
+ 65, 80, 80, 90, 90, 104};
+
+ damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+ new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+/*
+ * Test a big change. The second big region has totally freed and mapped to
+ * different area (50-59 -> 61-63). The target regions which were in the old
+ * second big region (50-55-57-59) should be removed and new target region
+ * covering the second big region (61-63) should be created.
+ */
+static void damon_test_apply_three_regions3(struct kunit *test)
+{
+ /* 10-20-30, 50-55-57-59, 70-80-90-100 */
+ unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+ 70, 80, 80, 90, 90, 100};
+ /* 5-27, 61-63, 65-104 */
+ struct damon_addr_range new_three_regions[3] = {
+ (struct damon_addr_range){.start = 5, .end = 27},
+ (struct damon_addr_range){.start = 61, .end = 63},
+ (struct damon_addr_range){.start = 65, .end = 104} };
+ /* 5-20-27, 61-63, 65-80-90-104 */
+ unsigned long expected[] = {5, 20, 20, 27, 61, 63,
+ 65, 80, 80, 90, 90, 104};
+
+ damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+ new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+/*
+ * Test another big change. Both of the second and third big regions (50-59
+ * and 70-100) has totally freed and mapped to different area (30-32 and
+ * 65-68). The target regions which were in the old second and third big
+ * regions should now be removed and new target regions covering the new second
+ * and third big regions should be created.
+ */
+static void damon_test_apply_three_regions4(struct kunit *test)
+{
+ /* 10-20-30, 50-55-57-59, 70-80-90-100 */
+ unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+ 70, 80, 80, 90, 90, 100};
+ /* 5-7, 30-32, 65-68 */
+ struct damon_addr_range new_three_regions[3] = {
+ (struct damon_addr_range){.start = 5, .end = 7},
+ (struct damon_addr_range){.start = 30, .end = 32},
+ (struct damon_addr_range){.start = 65, .end = 68} };
+ /* expect 5-7, 30-32, 65-68 */
+ unsigned long expected[] = {5, 7, 30, 32, 65, 68};
+
+ damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+ new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+static void damon_test_split_evenly_fail(struct kunit *test,
+ unsigned long start, unsigned long end, unsigned int nr_pieces)
+{
+ struct damon_target *t = damon_new_target();
+ struct damon_region *r;
+
+ if (!t)
+ kunit_skip(test, "target alloc fail");
+
+ r = damon_new_region(start, end);
+ if (!r) {
+ damon_free_target(t);
+ kunit_skip(test, "region alloc fail");
+ }
+
+ damon_add_region(r, t);
+ KUNIT_EXPECT_EQ(test,
+ damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u);
+
+ damon_for_each_region(r, t) {
+ KUNIT_EXPECT_EQ(test, r->ar.start, start);
+ KUNIT_EXPECT_EQ(test, r->ar.end, end);
+ }
+
+ damon_free_target(t);
+}
+
+static void damon_test_split_evenly_succ(struct kunit *test,
+ unsigned long start, unsigned long end, unsigned int nr_pieces)
+{
+ struct damon_target *t = damon_new_target();
+ struct damon_region *r;
+ unsigned long expected_width = (end - start) / nr_pieces;
+ unsigned long i = 0;
+
+ if (!t)
+ kunit_skip(test, "target alloc fail");
+ r = damon_new_region(start, end);
+ if (!r) {
+ damon_free_target(t);
+ kunit_skip(test, "region alloc fail");
+ }
+ damon_add_region(r, t);
+ KUNIT_EXPECT_EQ(test,
+ damon_va_evenly_split_region(t, r, nr_pieces), 0);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces);
+
+ damon_for_each_region(r, t) {
+ if (i == nr_pieces - 1) {
+ KUNIT_EXPECT_EQ(test,
+ r->ar.start, start + i * expected_width);
+ KUNIT_EXPECT_EQ(test, r->ar.end, end);
+ break;
+ }
+ KUNIT_EXPECT_EQ(test,
+ r->ar.start, start + i++ * expected_width);
+ KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width);
+ }
+ damon_free_target(t);
+}
+
+static void damon_test_split_evenly(struct kunit *test)
+{
+ KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5),
+ -EINVAL);
+
+ damon_test_split_evenly_fail(test, 0, 100, 0);
+ damon_test_split_evenly_succ(test, 0, 100, 10);
+ damon_test_split_evenly_succ(test, 5, 59, 5);
+ damon_test_split_evenly_succ(test, 4, 6, 1);
+ damon_test_split_evenly_succ(test, 0, 3, 2);
+ damon_test_split_evenly_fail(test, 5, 6, 2);
+}
+
+static struct kunit_case damon_test_cases[] = {
+ KUNIT_CASE(damon_test_three_regions_in_vmas),
+ KUNIT_CASE(damon_test_apply_three_regions1),
+ KUNIT_CASE(damon_test_apply_three_regions2),
+ KUNIT_CASE(damon_test_apply_three_regions3),
+ KUNIT_CASE(damon_test_apply_three_regions4),
+ KUNIT_CASE(damon_test_split_evenly),
+ {},
+};
+
+static struct kunit_suite damon_test_suite = {
+ .name = "damon-operations",
+ .test_cases = damon_test_cases,
+};
+kunit_test_suite(damon_test_suite);
+
+#endif /* _DAMON_VADDR_TEST_H */
+
+#endif /* CONFIG_DAMON_VADDR_KUNIT_TEST */
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
new file mode 100644
index 000000000000..2750c88e7225
--- /dev/null
+++ b/mm/damon/vaddr.c
@@ -0,0 +1,1038 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON Code for Virtual Address Spaces
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon-va: " fmt
+
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/mman.h>
+#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
+#include <linux/pagewalk.h>
+#include <linux/sched/mm.h>
+
+#include "../internal.h"
+#include "ops-common.h"
+
+#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
+#undef DAMON_MIN_REGION
+#define DAMON_MIN_REGION 1
+#endif
+
+/*
+ * 't->pid' should be the pointer to the relevant 'struct pid' having reference
+ * count. Caller must put the returned task, unless it is NULL.
+ */
+static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
+{
+ return get_pid_task(t->pid, PIDTYPE_PID);
+}
+
+/*
+ * Get the mm_struct of the given target
+ *
+ * Caller _must_ put the mm_struct after use, unless it is NULL.
+ *
+ * Returns the mm_struct of the target on success, NULL on failure
+ */
+static struct mm_struct *damon_get_mm(struct damon_target *t)
+{
+ struct task_struct *task;
+ struct mm_struct *mm;
+
+ task = damon_get_task_struct(t);
+ if (!task)
+ return NULL;
+
+ mm = get_task_mm(task);
+ put_task_struct(task);
+ return mm;
+}
+
+/*
+ * Functions for the initial monitoring target regions construction
+ */
+
+/*
+ * Size-evenly split a region into 'nr_pieces' small regions
+ *
+ * Returns 0 on success, or negative error code otherwise.
+ */
+static int damon_va_evenly_split_region(struct damon_target *t,
+ struct damon_region *r, unsigned int nr_pieces)
+{
+ unsigned long sz_orig, sz_piece, orig_end;
+ struct damon_region *n = NULL, *next;
+ unsigned long start;
+ unsigned int i;
+
+ if (!r || !nr_pieces)
+ return -EINVAL;
+
+ if (nr_pieces == 1)
+ return 0;
+
+ orig_end = r->ar.end;
+ sz_orig = damon_sz_region(r);
+ sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION);
+
+ if (!sz_piece)
+ return -EINVAL;
+
+ r->ar.end = r->ar.start + sz_piece;
+ next = damon_next_region(r);
+ for (start = r->ar.end, i = 1; i < nr_pieces; start += sz_piece, i++) {
+ n = damon_new_region(start, start + sz_piece);
+ if (!n)
+ return -ENOMEM;
+ damon_insert_region(n, r, next, t);
+ r = n;
+ }
+ /* complement last region for possible rounding error */
+ if (n)
+ n->ar.end = orig_end;
+
+ return 0;
+}
+
+static unsigned long sz_range(struct damon_addr_range *r)
+{
+ return r->end - r->start;
+}
+
+/*
+ * Find three regions separated by two biggest unmapped regions
+ *
+ * vma the head vma of the target address space
+ * regions an array of three address ranges that results will be saved
+ *
+ * This function receives an address space and finds three regions in it which
+ * separated by the two biggest unmapped regions in the space. Please refer to
+ * below comments of '__damon_va_init_regions()' function to know why this is
+ * necessary.
+ *
+ * Returns 0 if success, or negative error code otherwise.
+ */
+static int __damon_va_three_regions(struct mm_struct *mm,
+ struct damon_addr_range regions[3])
+{
+ struct damon_addr_range first_gap = {0}, second_gap = {0};
+ VMA_ITERATOR(vmi, mm, 0);
+ struct vm_area_struct *vma, *prev = NULL;
+ unsigned long start;
+
+ /*
+ * Find the two biggest gaps so that first_gap > second_gap > others.
+ * If this is too slow, it can be optimised to examine the maple
+ * tree gaps.
+ */
+ rcu_read_lock();
+ for_each_vma(vmi, vma) {
+ unsigned long gap;
+
+ if (!prev) {
+ start = vma->vm_start;
+ goto next;
+ }
+ gap = vma->vm_start - prev->vm_end;
+
+ if (gap > sz_range(&first_gap)) {
+ second_gap = first_gap;
+ first_gap.start = prev->vm_end;
+ first_gap.end = vma->vm_start;
+ } else if (gap > sz_range(&second_gap)) {
+ second_gap.start = prev->vm_end;
+ second_gap.end = vma->vm_start;
+ }
+next:
+ prev = vma;
+ }
+ rcu_read_unlock();
+
+ if (!sz_range(&second_gap) || !sz_range(&first_gap))
+ return -EINVAL;
+
+ /* Sort the two biggest gaps by address */
+ if (first_gap.start > second_gap.start)
+ swap(first_gap, second_gap);
+
+ /* Store the result */
+ regions[0].start = ALIGN(start, DAMON_MIN_REGION);
+ regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION);
+ regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION);
+ regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION);
+ regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION);
+ regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION);
+
+ return 0;
+}
+
+/*
+ * Get the three regions in the given target (task)
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+static int damon_va_three_regions(struct damon_target *t,
+ struct damon_addr_range regions[3])
+{
+ struct mm_struct *mm;
+ int rc;
+
+ mm = damon_get_mm(t);
+ if (!mm)
+ return -EINVAL;
+
+ mmap_read_lock(mm);
+ rc = __damon_va_three_regions(mm, regions);
+ mmap_read_unlock(mm);
+
+ mmput(mm);
+ return rc;
+}
+
+/*
+ * Initialize the monitoring target regions for the given target (task)
+ *
+ * t the given target
+ *
+ * Because only a number of small portions of the entire address space
+ * is actually mapped to the memory and accessed, monitoring the unmapped
+ * regions is wasteful. That said, because we can deal with small noises,
+ * tracking every mapping is not strictly required but could even incur a high
+ * overhead if the mapping frequently changes or the number of mappings is
+ * high. The adaptive regions adjustment mechanism will further help to deal
+ * with the noise by simply identifying the unmapped areas as a region that
+ * has no access. Moreover, applying the real mappings that would have many
+ * unmapped areas inside will make the adaptive mechanism quite complex. That
+ * said, too huge unmapped areas inside the monitoring target should be removed
+ * to not take the time for the adaptive mechanism.
+ *
+ * For the reason, we convert the complex mappings to three distinct regions
+ * that cover every mapped area of the address space. Also the two gaps
+ * between the three regions are the two biggest unmapped areas in the given
+ * address space. In detail, this function first identifies the start and the
+ * end of the mappings and the two biggest unmapped areas of the address space.
+ * Then, it constructs the three regions as below:
+ *
+ * [mappings[0]->start, big_two_unmapped_areas[0]->start)
+ * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start)
+ * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end)
+ *
+ * As usual memory map of processes is as below, the gap between the heap and
+ * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed
+ * region and the stack will be two biggest unmapped regions. Because these
+ * gaps are exceptionally huge areas in usual address space, excluding these
+ * two biggest unmapped regions will be sufficient to make a trade-off.
+ *
+ * <heap>
+ * <BIG UNMAPPED REGION 1>
+ * <uppermost mmap()-ed region>
+ * (other mmap()-ed regions and small unmapped regions)
+ * <lowermost mmap()-ed region>
+ * <BIG UNMAPPED REGION 2>
+ * <stack>
+ */
+static void __damon_va_init_regions(struct damon_ctx *ctx,
+ struct damon_target *t)
+{
+ struct damon_target *ti;
+ struct damon_region *r;
+ struct damon_addr_range regions[3];
+ unsigned long sz = 0, nr_pieces;
+ int i, tidx = 0;
+
+ if (damon_va_three_regions(t, regions)) {
+ damon_for_each_target(ti, ctx) {
+ if (ti == t)
+ break;
+ tidx++;
+ }
+ pr_debug("Failed to get three regions of %dth target\n", tidx);
+ return;
+ }
+
+ for (i = 0; i < 3; i++)
+ sz += regions[i].end - regions[i].start;
+ if (ctx->attrs.min_nr_regions)
+ sz /= ctx->attrs.min_nr_regions;
+ if (sz < DAMON_MIN_REGION)
+ sz = DAMON_MIN_REGION;
+
+ /* Set the initial three regions of the target */
+ for (i = 0; i < 3; i++) {
+ r = damon_new_region(regions[i].start, regions[i].end);
+ if (!r) {
+ pr_err("%d'th init region creation failed\n", i);
+ return;
+ }
+ damon_add_region(r, t);
+
+ nr_pieces = (regions[i].end - regions[i].start) / sz;
+ damon_va_evenly_split_region(t, r, nr_pieces);
+ }
+}
+
+/* Initialize '->regions_list' of every target (task) */
+static void damon_va_init(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+
+ damon_for_each_target(t, ctx) {
+ /* the user may set the target regions as they want */
+ if (!damon_nr_regions(t))
+ __damon_va_init_regions(ctx, t);
+ }
+}
+
+/*
+ * Update regions for current memory mappings
+ */
+static void damon_va_update(struct damon_ctx *ctx)
+{
+ struct damon_addr_range three_regions[3];
+ struct damon_target *t;
+
+ damon_for_each_target(t, ctx) {
+ if (damon_va_three_regions(t, three_regions))
+ continue;
+ damon_set_regions(t, three_regions, 3, DAMON_MIN_REGION);
+ }
+}
+
+static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ ptl = pmd_trans_huge_lock(pmd, walk->vma);
+ if (ptl) {
+ pmd_t pmde = pmdp_get(pmd);
+
+ if (pmd_present(pmde))
+ damon_pmdp_mkold(pmd, walk->vma, addr);
+ spin_unlock(ptl);
+ return 0;
+ }
+
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte)
+ return 0;
+ if (!pte_present(ptep_get(pte)))
+ goto out;
+ damon_ptep_mkold(pte, walk->vma, addr);
+out:
+ pte_unmap_unlock(pte, ptl);
+ return 0;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ bool referenced = false;
+ pte_t entry = huge_ptep_get(mm, addr, pte);
+ struct folio *folio = pfn_folio(pte_pfn(entry));
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
+ folio_get(folio);
+
+ if (pte_young(entry)) {
+ referenced = true;
+ entry = pte_mkold(entry);
+ set_huge_pte_at(mm, addr, pte, entry, psize);
+ }
+
+ if (mmu_notifier_clear_young(mm, addr,
+ addr + huge_page_size(hstate_vma(vma))))
+ referenced = true;
+
+ if (referenced)
+ folio_set_young(folio);
+
+ folio_set_idle(folio);
+ folio_put(folio);
+}
+
+static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct hstate *h = hstate_vma(walk->vma);
+ spinlock_t *ptl;
+ pte_t entry;
+
+ ptl = huge_pte_lock(h, walk->mm, pte);
+ entry = huge_ptep_get(walk->mm, addr, pte);
+ if (!pte_present(entry))
+ goto out;
+
+ damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
+
+out:
+ spin_unlock(ptl);
+ return 0;
+}
+#else
+#define damon_mkold_hugetlb_entry NULL
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static const struct mm_walk_ops damon_mkold_ops = {
+ .pmd_entry = damon_mkold_pmd_entry,
+ .hugetlb_entry = damon_mkold_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
+{
+ mmap_read_lock(mm);
+ walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
+ mmap_read_unlock(mm);
+}
+
+/*
+ * Functions for the access checking of the regions
+ */
+
+static void __damon_va_prepare_access_check(struct mm_struct *mm,
+ struct damon_region *r)
+{
+ r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
+
+ damon_va_mkold(mm, r->sampling_addr);
+}
+
+static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct mm_struct *mm;
+ struct damon_region *r;
+
+ damon_for_each_target(t, ctx) {
+ mm = damon_get_mm(t);
+ if (!mm)
+ continue;
+ damon_for_each_region(r, t)
+ __damon_va_prepare_access_check(mm, r);
+ mmput(mm);
+ }
+}
+
+struct damon_young_walk_private {
+ /* size of the folio for the access checked virtual memory address */
+ unsigned long *folio_sz;
+ bool young;
+};
+
+static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t *pte;
+ pte_t ptent;
+ spinlock_t *ptl;
+ struct folio *folio;
+ struct damon_young_walk_private *priv = walk->private;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ ptl = pmd_trans_huge_lock(pmd, walk->vma);
+ if (ptl) {
+ pmd_t pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde))
+ goto huge_out;
+ folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
+ if (!folio)
+ goto huge_out;
+ if (pmd_young(pmde) || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(walk->mm,
+ addr))
+ priv->young = true;
+ *priv->folio_sz = HPAGE_PMD_SIZE;
+huge_out:
+ spin_unlock(ptl);
+ return 0;
+ }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte)
+ return 0;
+ ptent = ptep_get(pte);
+ if (!pte_present(ptent))
+ goto out;
+ folio = vm_normal_folio(walk->vma, addr, ptent);
+ if (!folio)
+ goto out;
+ if (pte_young(ptent) || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(walk->mm, addr))
+ priv->young = true;
+ *priv->folio_sz = folio_size(folio);
+out:
+ pte_unmap_unlock(pte, ptl);
+ return 0;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct damon_young_walk_private *priv = walk->private;
+ struct hstate *h = hstate_vma(walk->vma);
+ struct folio *folio;
+ spinlock_t *ptl;
+ pte_t entry;
+
+ ptl = huge_pte_lock(h, walk->mm, pte);
+ entry = huge_ptep_get(walk->mm, addr, pte);
+ if (!pte_present(entry))
+ goto out;
+
+ folio = pfn_folio(pte_pfn(entry));
+ folio_get(folio);
+
+ if (pte_young(entry) || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(walk->mm, addr))
+ priv->young = true;
+ *priv->folio_sz = huge_page_size(h);
+
+ folio_put(folio);
+
+out:
+ spin_unlock(ptl);
+ return 0;
+}
+#else
+#define damon_young_hugetlb_entry NULL
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static const struct mm_walk_ops damon_young_ops = {
+ .pmd_entry = damon_young_pmd_entry,
+ .hugetlb_entry = damon_young_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
+ unsigned long *folio_sz)
+{
+ struct damon_young_walk_private arg = {
+ .folio_sz = folio_sz,
+ .young = false,
+ };
+
+ mmap_read_lock(mm);
+ walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
+ mmap_read_unlock(mm);
+ return arg.young;
+}
+
+/*
+ * Check whether the region was accessed after the last preparation
+ *
+ * mm 'mm_struct' for the given virtual address space
+ * r the region to be checked
+ */
+static void __damon_va_check_access(struct mm_struct *mm,
+ struct damon_region *r, bool same_target,
+ struct damon_attrs *attrs)
+{
+ static unsigned long last_addr;
+ static unsigned long last_folio_sz = PAGE_SIZE;
+ static bool last_accessed;
+
+ if (!mm) {
+ damon_update_region_access_rate(r, false, attrs);
+ return;
+ }
+
+ /* If the region is in the last checked page, reuse the result */
+ if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) ==
+ ALIGN_DOWN(r->sampling_addr, last_folio_sz))) {
+ damon_update_region_access_rate(r, last_accessed, attrs);
+ return;
+ }
+
+ last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz);
+ damon_update_region_access_rate(r, last_accessed, attrs);
+
+ last_addr = r->sampling_addr;
+}
+
+static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct mm_struct *mm;
+ struct damon_region *r;
+ unsigned int max_nr_accesses = 0;
+ bool same_target;
+
+ damon_for_each_target(t, ctx) {
+ mm = damon_get_mm(t);
+ same_target = false;
+ damon_for_each_region(r, t) {
+ __damon_va_check_access(mm, r, same_target,
+ &ctx->attrs);
+ max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
+ same_target = true;
+ }
+ if (mm)
+ mmput(mm);
+ }
+
+ return max_nr_accesses;
+}
+
+static bool damos_va_filter_young_match(struct damos_filter *filter,
+ struct folio *folio, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, pmd_t *pmdp)
+{
+ bool young = false;
+
+ if (ptep)
+ young = pte_young(ptep_get(ptep));
+ else if (pmdp)
+ young = pmd_young(pmdp_get(pmdp));
+
+ young = young || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(vma->vm_mm, addr);
+
+ if (young && ptep)
+ damon_ptep_mkold(ptep, vma, addr);
+ else if (young && pmdp)
+ damon_pmdp_mkold(pmdp, vma, addr);
+
+ return young == filter->matching;
+}
+
+static bool damos_va_filter_out(struct damos *scheme, struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pmd_t *pmdp)
+{
+ struct damos_filter *filter;
+ bool matched;
+
+ if (scheme->core_filters_allowed)
+ return false;
+
+ damos_for_each_ops_filter(filter, scheme) {
+ /*
+ * damos_folio_filter_match checks the young filter by doing an
+ * rmap on the folio to find its page table. However, being the
+ * vaddr scheme, we have direct access to the page tables, so
+ * use that instead.
+ */
+ if (filter->type == DAMOS_FILTER_TYPE_YOUNG)
+ matched = damos_va_filter_young_match(filter, folio,
+ vma, addr, ptep, pmdp);
+ else
+ matched = damos_folio_filter_match(filter, folio);
+
+ if (matched)
+ return !filter->allow;
+ }
+ return scheme->ops_filters_default_reject;
+}
+
+struct damos_va_migrate_private {
+ struct list_head *migration_lists;
+ struct damos *scheme;
+};
+
+/*
+ * Place the given folio in the migration_list corresponding to where the folio
+ * should be migrated.
+ *
+ * The algorithm used here is similar to weighted_interleave_nid()
+ */
+static void damos_va_migrate_dests_add(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr,
+ struct damos_migrate_dests *dests,
+ struct list_head *migration_lists)
+{
+ pgoff_t ilx;
+ int order;
+ unsigned int target;
+ unsigned int weight_total = 0;
+ int i;
+
+ /*
+ * If dests is empty, there is only one migration list corresponding
+ * to s->target_nid.
+ */
+ if (!dests->nr_dests) {
+ i = 0;
+ goto isolate;
+ }
+
+ order = folio_order(folio);
+ ilx = vma->vm_pgoff >> order;
+ ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order);
+
+ for (i = 0; i < dests->nr_dests; i++)
+ weight_total += dests->weight_arr[i];
+
+ /* If the total weights are somehow 0, don't migrate at all */
+ if (!weight_total)
+ return;
+
+ target = ilx % weight_total;
+ for (i = 0; i < dests->nr_dests; i++) {
+ if (target < dests->weight_arr[i])
+ break;
+ target -= dests->weight_arr[i];
+ }
+
+ /* If the folio is already in the right node, don't do anything */
+ if (folio_nid(folio) == dests->node_id_arr[i])
+ return;
+
+isolate:
+ if (!folio_isolate_lru(folio))
+ return;
+
+ list_add(&folio->lru, &migration_lists[i]);
+}
+
+static int damos_va_migrate_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct damos_va_migrate_private *priv = walk->private;
+ struct list_head *migration_lists = priv->migration_lists;
+ struct damos *s = priv->scheme;
+ struct damos_migrate_dests *dests = &s->migrate_dests;
+ struct folio *folio;
+ spinlock_t *ptl;
+ pte_t *start_pte, *pte, ptent;
+ int nr;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ ptl = pmd_trans_huge_lock(pmd, walk->vma);
+ if (ptl) {
+ pmd_t pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde))
+ goto huge_out;
+ folio = vm_normal_folio_pmd(walk->vma, addr, pmde);
+ if (!folio)
+ goto huge_out;
+ if (damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd))
+ goto huge_out;
+ damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
+ migration_lists);
+huge_out:
+ spin_unlock(ptl);
+ return 0;
+ }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+ start_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte)
+ return 0;
+
+ for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
+ nr = 1;
+ ptent = ptep_get(pte);
+
+ if (pte_none(ptent) || !pte_present(ptent))
+ continue;
+ folio = vm_normal_folio(walk->vma, addr, ptent);
+ if (!folio)
+ continue;
+ if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL))
+ return 0;
+ damos_va_migrate_dests_add(folio, walk->vma, addr, dests,
+ migration_lists);
+ nr = folio_nr_pages(folio);
+ }
+ pte_unmap_unlock(start_pte, ptl);
+ return 0;
+}
+
+/*
+ * Functions for the target validity check and cleanup
+ */
+
+static bool damon_va_target_valid(struct damon_target *t)
+{
+ struct task_struct *task;
+
+ task = damon_get_task_struct(t);
+ if (task) {
+ put_task_struct(task);
+ return true;
+ }
+
+ return false;
+}
+
+static void damon_va_cleanup_target(struct damon_target *t)
+{
+ put_pid(t->pid);
+}
+
+#ifndef CONFIG_ADVISE_SYSCALLS
+static unsigned long damos_madvise(struct damon_target *target,
+ struct damon_region *r, int behavior)
+{
+ return 0;
+}
+#else
+static unsigned long damos_madvise(struct damon_target *target,
+ struct damon_region *r, int behavior)
+{
+ struct mm_struct *mm;
+ unsigned long start = PAGE_ALIGN(r->ar.start);
+ unsigned long len = PAGE_ALIGN(damon_sz_region(r));
+ unsigned long applied;
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ return 0;
+
+ applied = do_madvise(mm, start, len, behavior) ? 0 : len;
+ mmput(mm);
+
+ return applied;
+}
+#endif /* CONFIG_ADVISE_SYSCALLS */
+
+static unsigned long damos_va_migrate(struct damon_target *target,
+ struct damon_region *r, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ LIST_HEAD(folio_list);
+ struct damos_va_migrate_private priv;
+ struct mm_struct *mm;
+ int nr_dests;
+ int nid;
+ bool use_target_nid;
+ unsigned long applied = 0;
+ struct damos_migrate_dests *dests = &s->migrate_dests;
+ struct mm_walk_ops walk_ops = {
+ .pmd_entry = damos_va_migrate_pmd_entry,
+ .pte_entry = NULL,
+ .walk_lock = PGWALK_RDLOCK,
+ };
+
+ use_target_nid = dests->nr_dests == 0;
+ nr_dests = use_target_nid ? 1 : dests->nr_dests;
+ priv.scheme = s;
+ priv.migration_lists = kmalloc_array(nr_dests,
+ sizeof(*priv.migration_lists), GFP_KERNEL);
+ if (!priv.migration_lists)
+ return 0;
+
+ for (int i = 0; i < nr_dests; i++)
+ INIT_LIST_HEAD(&priv.migration_lists[i]);
+
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ goto free_lists;
+
+ mmap_read_lock(mm);
+ walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
+ mmap_read_unlock(mm);
+ mmput(mm);
+
+ for (int i = 0; i < nr_dests; i++) {
+ nid = use_target_nid ? s->target_nid : dests->node_id_arr[i];
+ applied += damon_migrate_pages(&priv.migration_lists[i], nid);
+ cond_resched();
+ }
+
+free_lists:
+ kfree(priv.migration_lists);
+ return applied * PAGE_SIZE;
+}
+
+struct damos_va_stat_private {
+ struct damos *scheme;
+ unsigned long *sz_filter_passed;
+};
+
+static inline bool damos_va_invalid_folio(struct folio *folio,
+ struct damos *s)
+{
+ return !folio || folio == s->last_applied;
+}
+
+static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct damos_va_stat_private *priv = walk->private;
+ struct damos *s = priv->scheme;
+ unsigned long *sz_filter_passed = priv->sz_filter_passed;
+ struct vm_area_struct *vma = walk->vma;
+ struct folio *folio;
+ spinlock_t *ptl;
+ pte_t *start_pte, *pte, ptent;
+ int nr;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
+ pmd_t pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde))
+ goto huge_unlock;
+
+ folio = vm_normal_folio_pmd(vma, addr, pmde);
+
+ if (damos_va_invalid_folio(folio, s))
+ goto huge_unlock;
+
+ if (!damos_va_filter_out(s, folio, vma, addr, NULL, pmd))
+ *sz_filter_passed += folio_size(folio);
+ s->last_applied = folio;
+
+huge_unlock:
+ spin_unlock(ptl);
+ return 0;
+ }
+#endif
+ start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ if (!start_pte)
+ return 0;
+
+ for (; addr < next; pte += nr, addr += nr * PAGE_SIZE) {
+ nr = 1;
+ ptent = ptep_get(pte);
+
+ if (pte_none(ptent) || !pte_present(ptent))
+ continue;
+
+ folio = vm_normal_folio(vma, addr, ptent);
+
+ if (damos_va_invalid_folio(folio, s))
+ continue;
+
+ if (!damos_va_filter_out(s, folio, vma, addr, pte, NULL))
+ *sz_filter_passed += folio_size(folio);
+ nr = folio_nr_pages(folio);
+ s->last_applied = folio;
+ }
+ pte_unmap_unlock(start_pte, ptl);
+ return 0;
+}
+
+static unsigned long damos_va_stat(struct damon_target *target,
+ struct damon_region *r, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ struct damos_va_stat_private priv;
+ struct mm_struct *mm;
+ struct mm_walk_ops walk_ops = {
+ .pmd_entry = damos_va_stat_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
+ };
+
+ priv.scheme = s;
+ priv.sz_filter_passed = sz_filter_passed;
+
+ if (!damos_ops_has_filter(s))
+ return 0;
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ return 0;
+
+ mmap_read_lock(mm);
+ walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
+ mmap_read_unlock(mm);
+ mmput(mm);
+ return 0;
+}
+
+static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme, unsigned long *sz_filter_passed)
+{
+ int madv_action;
+
+ switch (scheme->action) {
+ case DAMOS_WILLNEED:
+ madv_action = MADV_WILLNEED;
+ break;
+ case DAMOS_COLD:
+ madv_action = MADV_COLD;
+ break;
+ case DAMOS_PAGEOUT:
+ madv_action = MADV_PAGEOUT;
+ break;
+ case DAMOS_HUGEPAGE:
+ madv_action = MADV_HUGEPAGE;
+ break;
+ case DAMOS_NOHUGEPAGE:
+ madv_action = MADV_NOHUGEPAGE;
+ break;
+ case DAMOS_MIGRATE_HOT:
+ case DAMOS_MIGRATE_COLD:
+ return damos_va_migrate(t, r, scheme, sz_filter_passed);
+ case DAMOS_STAT:
+ return damos_va_stat(t, r, scheme, sz_filter_passed);
+ default:
+ /*
+ * DAMOS actions that are not yet supported by 'vaddr'.
+ */
+ return 0;
+ }
+
+ return damos_madvise(t, r, madv_action);
+}
+
+static int damon_va_scheme_score(struct damon_ctx *context,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
+{
+
+ switch (scheme->action) {
+ case DAMOS_PAGEOUT:
+ return damon_cold_score(context, r, scheme);
+ case DAMOS_MIGRATE_HOT:
+ return damon_hot_score(context, r, scheme);
+ case DAMOS_MIGRATE_COLD:
+ return damon_cold_score(context, r, scheme);
+ default:
+ break;
+ }
+
+ return DAMOS_MAX_SCORE;
+}
+
+static int __init damon_va_initcall(void)
+{
+ struct damon_operations ops = {
+ .id = DAMON_OPS_VADDR,
+ .init = damon_va_init,
+ .update = damon_va_update,
+ .prepare_access_checks = damon_va_prepare_access_checks,
+ .check_accesses = damon_va_check_accesses,
+ .target_valid = damon_va_target_valid,
+ .cleanup_target = damon_va_cleanup_target,
+ .cleanup = NULL,
+ .apply_scheme = damon_va_apply_scheme,
+ .get_scheme_score = damon_va_scheme_score,
+ };
+ /* ops for fixed virtual address ranges */
+ struct damon_operations ops_fvaddr = ops;
+ int err;
+
+ /* Don't set the monitoring target regions for the entire mapping */
+ ops_fvaddr.id = DAMON_OPS_FVADDR;
+ ops_fvaddr.init = NULL;
+ ops_fvaddr.update = NULL;
+
+ err = damon_register_ops(&ops);
+ if (err)
+ return err;
+ return damon_register_ops(&ops_fvaddr);
+};
+
+subsys_initcall(damon_va_initcall);
+
+#include "tests/vaddr-kunit.h"