diff options
Diffstat (limited to 'mm/damon/core.c')
| -rw-r--r-- | mm/damon/core.c | 1860 |
1 files changed, 1722 insertions, 138 deletions
diff --git a/mm/damon/core.c b/mm/damon/core.c index ceec75b88ef9..f9fc0375890a 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2,7 +2,7 @@ /* * Data Access Monitor * - * Author: SeongJae Park <sjpark@amazon.de> + * Author: SeongJae Park <sj@kernel.org> */ #define pr_fmt(fmt) "damon: " fmt @@ -10,18 +10,16 @@ #include <linux/damon.h> #include <linux/delay.h> #include <linux/kthread.h> +#include <linux/memcontrol.h> #include <linux/mm.h> +#include <linux/psi.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/string_choices.h> #define CREATE_TRACE_POINTS #include <trace/events/damon.h> -#ifdef CONFIG_DAMON_KUNIT_TEST -#undef DAMON_MIN_REGION -#define DAMON_MIN_REGION 1 -#endif - static DEFINE_MUTEX(damon_lock); static int nr_running_ctxs; static bool running_exclusive_ctxs; @@ -74,14 +72,13 @@ int damon_register_ops(struct damon_operations *ops) if (ops->id >= NR_DAMON_OPS) return -EINVAL; + mutex_lock(&damon_ops_lock); /* Fail for already registered ops */ - if (__damon_is_registered_ops(ops->id)) { + if (__damon_is_registered_ops(ops->id)) err = -EINVAL; - goto out; - } - damon_registered_ops[ops->id] = *ops; -out: + else + damon_registered_ops[ops->id] = *ops; mutex_unlock(&damon_ops_lock); return err; } @@ -128,6 +125,7 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end) region->ar.start = start; region->ar.end = end; region->nr_accesses = 0; + region->nr_accesses_bp = 0; INIT_LIST_HEAD(®ion->list); region->age = 0; @@ -199,6 +197,7 @@ static int damon_fill_regions_holes(struct damon_region *first, * @t: the given target. * @ranges: array of new monitoring target ranges. * @nr_ranges: length of @ranges. + * @min_sz_region: minimum region size. * * This function adds new regions to, or modify existing regions of a * monitoring target to fit in specific ranges. @@ -206,7 +205,7 @@ static int damon_fill_regions_holes(struct damon_region *first, * Return: 0 if success, or negative error code otherwise. */ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, - unsigned int nr_ranges) + unsigned int nr_ranges, unsigned long min_sz_region) { struct damon_region *r, *next; unsigned int i; @@ -243,16 +242,16 @@ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, /* no region intersects with this range */ newr = damon_new_region( ALIGN_DOWN(range->start, - DAMON_MIN_REGION), - ALIGN(range->end, DAMON_MIN_REGION)); + min_sz_region), + ALIGN(range->end, min_sz_region)); if (!newr) return -ENOMEM; damon_insert_region(newr, damon_prev_region(r), r, t); } else { /* resize intersecting regions to fit in this range */ first->ar.start = ALIGN_DOWN(range->start, - DAMON_MIN_REGION); - last->ar.end = ALIGN(range->end, DAMON_MIN_REGION); + min_sz_region); + last->ar.end = ALIGN(range->end, min_sz_region); /* fill possible holes in the range */ err = damon_fill_regions_holes(first, last, t); @@ -263,22 +262,120 @@ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, return 0; } -/* initialize private fields of damos_quota and return the pointer */ -static struct damos_quota *damos_quota_init_priv(struct damos_quota *quota) +struct damos_filter *damos_new_filter(enum damos_filter_type type, + bool matching, bool allow) +{ + struct damos_filter *filter; + + filter = kmalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return NULL; + filter->type = type; + filter->matching = matching; + filter->allow = allow; + INIT_LIST_HEAD(&filter->list); + return filter; +} + +/** + * damos_filter_for_ops() - Return if the filter is ops-hndled one. + * @type: type of the filter. + * + * Return: true if the filter of @type needs to be handled by ops layer, false + * otherwise. + */ +bool damos_filter_for_ops(enum damos_filter_type type) +{ + switch (type) { + case DAMOS_FILTER_TYPE_ADDR: + case DAMOS_FILTER_TYPE_TARGET: + return false; + default: + break; + } + return true; +} + +void damos_add_filter(struct damos *s, struct damos_filter *f) +{ + if (damos_filter_for_ops(f->type)) + list_add_tail(&f->list, &s->ops_filters); + else + list_add_tail(&f->list, &s->core_filters); +} + +static void damos_del_filter(struct damos_filter *f) +{ + list_del(&f->list); +} + +static void damos_free_filter(struct damos_filter *f) +{ + kfree(f); +} + +void damos_destroy_filter(struct damos_filter *f) +{ + damos_del_filter(f); + damos_free_filter(f); +} + +struct damos_quota_goal *damos_new_quota_goal( + enum damos_quota_goal_metric metric, + unsigned long target_value) +{ + struct damos_quota_goal *goal; + + goal = kmalloc(sizeof(*goal), GFP_KERNEL); + if (!goal) + return NULL; + goal->metric = metric; + goal->target_value = target_value; + INIT_LIST_HEAD(&goal->list); + return goal; +} + +void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g) +{ + list_add_tail(&g->list, &q->goals); +} + +static void damos_del_quota_goal(struct damos_quota_goal *g) { + list_del(&g->list); +} + +static void damos_free_quota_goal(struct damos_quota_goal *g) +{ + kfree(g); +} + +void damos_destroy_quota_goal(struct damos_quota_goal *g) +{ + damos_del_quota_goal(g); + damos_free_quota_goal(g); +} + +/* initialize fields of @quota that normally API users wouldn't set */ +static struct damos_quota *damos_quota_init(struct damos_quota *quota) +{ + quota->esz = 0; quota->total_charged_sz = 0; quota->total_charged_ns = 0; - quota->esz = 0; quota->charged_sz = 0; quota->charged_from = 0; quota->charge_target_from = NULL; quota->charge_addr_from = 0; + quota->esz_bp = 0; return quota; } struct damos *damon_new_scheme(struct damos_access_pattern *pattern, - enum damos_action action, struct damos_quota *quota, - struct damos_watermarks *wmarks) + enum damos_action action, + unsigned long apply_interval_us, + struct damos_quota *quota, + struct damos_watermarks *wmarks, + int target_nid) { struct damos *scheme; @@ -287,20 +384,47 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern, return NULL; scheme->pattern = *pattern; scheme->action = action; + scheme->apply_interval_us = apply_interval_us; + /* + * next_apply_sis will be set when kdamond starts. While kdamond is + * running, it will also updated when it is added to the DAMON context, + * or damon_attrs are updated. + */ + scheme->next_apply_sis = 0; + scheme->walk_completed = false; + INIT_LIST_HEAD(&scheme->core_filters); + INIT_LIST_HEAD(&scheme->ops_filters); scheme->stat = (struct damos_stat){}; INIT_LIST_HEAD(&scheme->list); - scheme->quota = *(damos_quota_init_priv(quota)); + scheme->quota = *(damos_quota_init(quota)); + /* quota.goals should be separately set by caller */ + INIT_LIST_HEAD(&scheme->quota.goals); scheme->wmarks = *wmarks; scheme->wmarks.activated = true; + scheme->migrate_dests = (struct damos_migrate_dests){}; + scheme->target_nid = target_nid; + return scheme; } +static void damos_set_next_apply_sis(struct damos *s, struct damon_ctx *ctx) +{ + unsigned long sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + unsigned long apply_interval = s->apply_interval_us ? + s->apply_interval_us : ctx->attrs.aggr_interval; + + s->next_apply_sis = ctx->passed_sample_intervals + + apply_interval / sample_interval; +} + void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) { list_add_tail(&s->list, &ctx->schemes); + damos_set_next_apply_sis(s, ctx); } static void damon_del_scheme(struct damos *s) @@ -315,6 +439,20 @@ static void damon_free_scheme(struct damos *s) void damon_destroy_scheme(struct damos *s) { + struct damos_quota_goal *g, *g_next; + struct damos_filter *f, *next; + + damos_for_each_quota_goal_safe(g, g_next, &s->quota) + damos_destroy_quota_goal(g); + + damos_for_each_core_filter_safe(f, next, s) + damos_destroy_filter(f); + + damos_for_each_ops_filter_safe(f, next, s) + damos_destroy_filter(f); + + kfree(s->migrate_dests.node_id_arr); + kfree(s->migrate_dests.weight_arr); damon_del_scheme(s); damon_free_scheme(s); } @@ -336,6 +474,7 @@ struct damon_target *damon_new_target(void) t->nr_regions = 0; INIT_LIST_HEAD(&t->regions_list); INIT_LIST_HEAD(&t->list); + t->obsolete = false; return t; } @@ -364,8 +503,12 @@ void damon_free_target(struct damon_target *t) kfree(t); } -void damon_destroy_target(struct damon_target *t) +void damon_destroy_target(struct damon_target *t, struct damon_ctx *ctx) { + + if (ctx && ctx->ops.cleanup_target) + ctx->ops.cleanup_target(t); + damon_del_target(t); damon_free_target(t); } @@ -383,18 +526,28 @@ struct damon_ctx *damon_new_ctx(void) if (!ctx) return NULL; + init_completion(&ctx->kdamond_started); + ctx->attrs.sample_interval = 5 * 1000; ctx->attrs.aggr_interval = 100 * 1000; ctx->attrs.ops_update_interval = 60 * 1000 * 1000; - ktime_get_coarse_ts64(&ctx->last_aggregation); - ctx->last_ops_update = ctx->last_aggregation; + ctx->passed_sample_intervals = 0; + /* These will be set from kdamond_init_ctx() */ + ctx->next_aggregation_sis = 0; + ctx->next_ops_update_sis = 0; mutex_init(&ctx->kdamond_lock); + INIT_LIST_HEAD(&ctx->call_controls); + mutex_init(&ctx->call_controls_lock); + mutex_init(&ctx->walk_control_lock); ctx->attrs.min_nr_regions = 10; ctx->attrs.max_nr_regions = 1000; + ctx->addr_unit = 1; + ctx->min_sz_region = DAMON_MIN_REGION; + INIT_LIST_HEAD(&ctx->adaptive_targets); INIT_LIST_HEAD(&ctx->schemes); @@ -405,13 +558,8 @@ static void damon_destroy_targets(struct damon_ctx *ctx) { struct damon_target *t, *next_t; - if (ctx->ops.cleanup) { - ctx->ops.cleanup(ctx); - return; - } - damon_for_each_target_safe(t, next_t, ctx) - damon_destroy_target(t); + damon_destroy_target(t, ctx); } void damon_destroy_ctx(struct damon_ctx *ctx) @@ -426,24 +574,172 @@ void damon_destroy_ctx(struct damon_ctx *ctx) kfree(ctx); } +static bool damon_attrs_equals(const struct damon_attrs *attrs1, + const struct damon_attrs *attrs2) +{ + const struct damon_intervals_goal *ig1 = &attrs1->intervals_goal; + const struct damon_intervals_goal *ig2 = &attrs2->intervals_goal; + + return attrs1->sample_interval == attrs2->sample_interval && + attrs1->aggr_interval == attrs2->aggr_interval && + attrs1->ops_update_interval == attrs2->ops_update_interval && + attrs1->min_nr_regions == attrs2->min_nr_regions && + attrs1->max_nr_regions == attrs2->max_nr_regions && + ig1->access_bp == ig2->access_bp && + ig1->aggrs == ig2->aggrs && + ig1->min_sample_us == ig2->min_sample_us && + ig1->max_sample_us == ig2->max_sample_us; +} + +static unsigned int damon_age_for_new_attrs(unsigned int age, + struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) +{ + return age * old_attrs->aggr_interval / new_attrs->aggr_interval; +} + +/* convert access ratio in bp (per 10,000) to nr_accesses */ +static unsigned int damon_accesses_bp_to_nr_accesses( + unsigned int accesses_bp, struct damon_attrs *attrs) +{ + return accesses_bp * damon_max_nr_accesses(attrs) / 10000; +} + +/* + * Convert nr_accesses to access ratio in bp (per 10,000). + * + * Callers should ensure attrs.aggr_interval is not zero, like + * damon_update_monitoring_results() does . Otherwise, divide-by-zero would + * happen. + */ +static unsigned int damon_nr_accesses_to_accesses_bp( + unsigned int nr_accesses, struct damon_attrs *attrs) +{ + return nr_accesses * 10000 / damon_max_nr_accesses(attrs); +} + +static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses, + struct damon_attrs *old_attrs, struct damon_attrs *new_attrs) +{ + return damon_accesses_bp_to_nr_accesses( + damon_nr_accesses_to_accesses_bp( + nr_accesses, old_attrs), + new_attrs); +} + +static void damon_update_monitoring_result(struct damon_region *r, + struct damon_attrs *old_attrs, struct damon_attrs *new_attrs, + bool aggregating) +{ + if (!aggregating) { + r->nr_accesses = damon_nr_accesses_for_new_attrs( + r->nr_accesses, old_attrs, new_attrs); + r->nr_accesses_bp = r->nr_accesses * 10000; + } else { + /* + * if this is called in the middle of the aggregation, reset + * the aggregations we made so far for this aggregation + * interval. In other words, make the status like + * kdamond_reset_aggregated() is called. + */ + r->last_nr_accesses = damon_nr_accesses_for_new_attrs( + r->last_nr_accesses, old_attrs, new_attrs); + r->nr_accesses_bp = r->last_nr_accesses * 10000; + r->nr_accesses = 0; + } + r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs); +} + +/* + * region->nr_accesses is the number of sampling intervals in the last + * aggregation interval that access to the region has found, and region->age is + * the number of aggregation intervals that its access pattern has maintained. + * For the reason, the real meaning of the two fields depend on current + * sampling interval and aggregation interval. This function updates + * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs. + */ +static void damon_update_monitoring_results(struct damon_ctx *ctx, + struct damon_attrs *new_attrs, bool aggregating) +{ + struct damon_attrs *old_attrs = &ctx->attrs; + struct damon_target *t; + struct damon_region *r; + + /* if any interval is zero, simply forgive conversion */ + if (!old_attrs->sample_interval || !old_attrs->aggr_interval || + !new_attrs->sample_interval || + !new_attrs->aggr_interval) + return; + + damon_for_each_target(t, ctx) + damon_for_each_region(r, t) + damon_update_monitoring_result( + r, old_attrs, new_attrs, aggregating); +} + +/* + * damon_valid_intervals_goal() - return if the intervals goal of @attrs is + * valid. + */ +static bool damon_valid_intervals_goal(struct damon_attrs *attrs) +{ + struct damon_intervals_goal *goal = &attrs->intervals_goal; + + /* tuning is disabled */ + if (!goal->aggrs) + return true; + if (goal->min_sample_us > goal->max_sample_us) + return false; + if (attrs->sample_interval < goal->min_sample_us || + goal->max_sample_us < attrs->sample_interval) + return false; + return true; +} + /** * damon_set_attrs() - Set attributes for the monitoring. * @ctx: monitoring context * @attrs: monitoring attributes * - * This function should not be called while the kdamond is running. + * This function should be called while the kdamond is not running, an access + * check results aggregation is not ongoing (e.g., from damon_call(). + * * Every time interval is in micro-seconds. * * Return: 0 on success, negative error code otherwise. */ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) { + unsigned long sample_interval = attrs->sample_interval ? + attrs->sample_interval : 1; + struct damos *s; + bool aggregating = ctx->passed_sample_intervals < + ctx->next_aggregation_sis; + + if (!damon_valid_intervals_goal(attrs)) + return -EINVAL; + if (attrs->min_nr_regions < 3) return -EINVAL; if (attrs->min_nr_regions > attrs->max_nr_regions) return -EINVAL; + if (attrs->sample_interval > attrs->aggr_interval) + return -EINVAL; + + /* calls from core-external doesn't set this. */ + if (!attrs->aggr_samples) + attrs->aggr_samples = attrs->aggr_interval / sample_interval; + + ctx->next_aggregation_sis = ctx->passed_sample_intervals + + attrs->aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->passed_sample_intervals + + attrs->ops_update_interval / sample_interval; + damon_update_monitoring_results(ctx, attrs, aggregating); ctx->attrs = *attrs; + + damon_for_each_scheme(s, ctx) + damos_set_next_apply_sis(s, ctx); + return 0; } @@ -468,6 +764,508 @@ void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, damon_add_scheme(ctx, schemes[i]); } +static struct damos_quota_goal *damos_nth_quota_goal( + int n, struct damos_quota *q) +{ + struct damos_quota_goal *goal; + int i = 0; + + damos_for_each_quota_goal(goal, q) { + if (i++ == n) + return goal; + } + return NULL; +} + +static void damos_commit_quota_goal_union( + struct damos_quota_goal *dst, struct damos_quota_goal *src) +{ + switch (dst->metric) { + case DAMOS_QUOTA_NODE_MEM_USED_BP: + case DAMOS_QUOTA_NODE_MEM_FREE_BP: + dst->nid = src->nid; + break; + case DAMOS_QUOTA_NODE_MEMCG_USED_BP: + case DAMOS_QUOTA_NODE_MEMCG_FREE_BP: + dst->nid = src->nid; + dst->memcg_id = src->memcg_id; + break; + default: + break; + } +} + +static void damos_commit_quota_goal( + struct damos_quota_goal *dst, struct damos_quota_goal *src) +{ + dst->metric = src->metric; + dst->target_value = src->target_value; + if (dst->metric == DAMOS_QUOTA_USER_INPUT) + dst->current_value = src->current_value; + /* keep last_psi_total as is, since it will be updated in next cycle */ + damos_commit_quota_goal_union(dst, src); +} + +/** + * damos_commit_quota_goals() - Commit DAMOS quota goals to another quota. + * @dst: The commit destination DAMOS quota. + * @src: The commit source DAMOS quota. + * + * Copies user-specified parameters for quota goals from @src to @dst. Users + * should use this function for quota goals-level parameters update of running + * DAMON contexts, instead of manual in-place updates. + * + * This function should be called from parameters-update safe context, like + * damon_call(). + */ +int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src) +{ + struct damos_quota_goal *dst_goal, *next, *src_goal, *new_goal; + int i = 0, j = 0; + + damos_for_each_quota_goal_safe(dst_goal, next, dst) { + src_goal = damos_nth_quota_goal(i++, src); + if (src_goal) + damos_commit_quota_goal(dst_goal, src_goal); + else + damos_destroy_quota_goal(dst_goal); + } + damos_for_each_quota_goal_safe(src_goal, next, src) { + if (j++ < i) + continue; + new_goal = damos_new_quota_goal( + src_goal->metric, src_goal->target_value); + if (!new_goal) + return -ENOMEM; + damos_commit_quota_goal(new_goal, src_goal); + damos_add_quota_goal(dst, new_goal); + } + return 0; +} + +static int damos_commit_quota(struct damos_quota *dst, struct damos_quota *src) +{ + int err; + + dst->reset_interval = src->reset_interval; + dst->ms = src->ms; + dst->sz = src->sz; + err = damos_commit_quota_goals(dst, src); + if (err) + return err; + dst->weight_sz = src->weight_sz; + dst->weight_nr_accesses = src->weight_nr_accesses; + dst->weight_age = src->weight_age; + return 0; +} + +static struct damos_filter *damos_nth_core_filter(int n, struct damos *s) +{ + struct damos_filter *filter; + int i = 0; + + damos_for_each_core_filter(filter, s) { + if (i++ == n) + return filter; + } + return NULL; +} + +static struct damos_filter *damos_nth_ops_filter(int n, struct damos *s) +{ + struct damos_filter *filter; + int i = 0; + + damos_for_each_ops_filter(filter, s) { + if (i++ == n) + return filter; + } + return NULL; +} + +static void damos_commit_filter_arg( + struct damos_filter *dst, struct damos_filter *src) +{ + switch (dst->type) { + case DAMOS_FILTER_TYPE_MEMCG: + dst->memcg_id = src->memcg_id; + break; + case DAMOS_FILTER_TYPE_ADDR: + dst->addr_range = src->addr_range; + break; + case DAMOS_FILTER_TYPE_TARGET: + dst->target_idx = src->target_idx; + break; + case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: + dst->sz_range = src->sz_range; + break; + default: + break; + } +} + +static void damos_commit_filter( + struct damos_filter *dst, struct damos_filter *src) +{ + dst->type = src->type; + dst->matching = src->matching; + dst->allow = src->allow; + damos_commit_filter_arg(dst, src); +} + +static int damos_commit_core_filters(struct damos *dst, struct damos *src) +{ + struct damos_filter *dst_filter, *next, *src_filter, *new_filter; + int i = 0, j = 0; + + damos_for_each_core_filter_safe(dst_filter, next, dst) { + src_filter = damos_nth_core_filter(i++, src); + if (src_filter) + damos_commit_filter(dst_filter, src_filter); + else + damos_destroy_filter(dst_filter); + } + + damos_for_each_core_filter_safe(src_filter, next, src) { + if (j++ < i) + continue; + + new_filter = damos_new_filter( + src_filter->type, src_filter->matching, + src_filter->allow); + if (!new_filter) + return -ENOMEM; + damos_commit_filter_arg(new_filter, src_filter); + damos_add_filter(dst, new_filter); + } + return 0; +} + +static int damos_commit_ops_filters(struct damos *dst, struct damos *src) +{ + struct damos_filter *dst_filter, *next, *src_filter, *new_filter; + int i = 0, j = 0; + + damos_for_each_ops_filter_safe(dst_filter, next, dst) { + src_filter = damos_nth_ops_filter(i++, src); + if (src_filter) + damos_commit_filter(dst_filter, src_filter); + else + damos_destroy_filter(dst_filter); + } + + damos_for_each_ops_filter_safe(src_filter, next, src) { + if (j++ < i) + continue; + + new_filter = damos_new_filter( + src_filter->type, src_filter->matching, + src_filter->allow); + if (!new_filter) + return -ENOMEM; + damos_commit_filter_arg(new_filter, src_filter); + damos_add_filter(dst, new_filter); + } + return 0; +} + +/** + * damos_filters_default_reject() - decide whether to reject memory that didn't + * match with any given filter. + * @filters: Given DAMOS filters of a group. + */ +static bool damos_filters_default_reject(struct list_head *filters) +{ + struct damos_filter *last_filter; + + if (list_empty(filters)) + return false; + last_filter = list_last_entry(filters, struct damos_filter, list); + return last_filter->allow; +} + +static void damos_set_filters_default_reject(struct damos *s) +{ + if (!list_empty(&s->ops_filters)) + s->core_filters_default_reject = false; + else + s->core_filters_default_reject = + damos_filters_default_reject(&s->core_filters); + s->ops_filters_default_reject = + damos_filters_default_reject(&s->ops_filters); +} + +static int damos_commit_dests(struct damos_migrate_dests *dst, + struct damos_migrate_dests *src) +{ + if (dst->nr_dests != src->nr_dests) { + kfree(dst->node_id_arr); + kfree(dst->weight_arr); + + dst->node_id_arr = kmalloc_array(src->nr_dests, + sizeof(*dst->node_id_arr), GFP_KERNEL); + if (!dst->node_id_arr) { + dst->weight_arr = NULL; + return -ENOMEM; + } + + dst->weight_arr = kmalloc_array(src->nr_dests, + sizeof(*dst->weight_arr), GFP_KERNEL); + if (!dst->weight_arr) { + /* ->node_id_arr will be freed by scheme destruction */ + return -ENOMEM; + } + } + + dst->nr_dests = src->nr_dests; + for (int i = 0; i < src->nr_dests; i++) { + dst->node_id_arr[i] = src->node_id_arr[i]; + dst->weight_arr[i] = src->weight_arr[i]; + } + + return 0; +} + +static int damos_commit_filters(struct damos *dst, struct damos *src) +{ + int err; + + err = damos_commit_core_filters(dst, src); + if (err) + return err; + err = damos_commit_ops_filters(dst, src); + if (err) + return err; + damos_set_filters_default_reject(dst); + return 0; +} + +static struct damos *damon_nth_scheme(int n, struct damon_ctx *ctx) +{ + struct damos *s; + int i = 0; + + damon_for_each_scheme(s, ctx) { + if (i++ == n) + return s; + } + return NULL; +} + +static int damos_commit(struct damos *dst, struct damos *src) +{ + int err; + + dst->pattern = src->pattern; + dst->action = src->action; + dst->apply_interval_us = src->apply_interval_us; + + err = damos_commit_quota(&dst->quota, &src->quota); + if (err) + return err; + + dst->wmarks = src->wmarks; + dst->target_nid = src->target_nid; + + err = damos_commit_dests(&dst->migrate_dests, &src->migrate_dests); + if (err) + return err; + + err = damos_commit_filters(dst, src); + return err; +} + +static int damon_commit_schemes(struct damon_ctx *dst, struct damon_ctx *src) +{ + struct damos *dst_scheme, *next, *src_scheme, *new_scheme; + int i = 0, j = 0, err; + + damon_for_each_scheme_safe(dst_scheme, next, dst) { + src_scheme = damon_nth_scheme(i++, src); + if (src_scheme) { + err = damos_commit(dst_scheme, src_scheme); + if (err) + return err; + } else { + damon_destroy_scheme(dst_scheme); + } + } + + damon_for_each_scheme_safe(src_scheme, next, src) { + if (j++ < i) + continue; + new_scheme = damon_new_scheme(&src_scheme->pattern, + src_scheme->action, + src_scheme->apply_interval_us, + &src_scheme->quota, &src_scheme->wmarks, + NUMA_NO_NODE); + if (!new_scheme) + return -ENOMEM; + err = damos_commit(new_scheme, src_scheme); + if (err) { + damon_destroy_scheme(new_scheme); + return err; + } + damon_add_scheme(dst, new_scheme); + } + return 0; +} + +static struct damon_target *damon_nth_target(int n, struct damon_ctx *ctx) +{ + struct damon_target *t; + int i = 0; + + damon_for_each_target(t, ctx) { + if (i++ == n) + return t; + } + return NULL; +} + +/* + * The caller should ensure the regions of @src are + * 1. valid (end >= src) and + * 2. sorted by starting address. + * + * If @src has no region, @dst keeps current regions. + */ +static int damon_commit_target_regions(struct damon_target *dst, + struct damon_target *src, unsigned long src_min_sz_region) +{ + struct damon_region *src_region; + struct damon_addr_range *ranges; + int i = 0, err; + + damon_for_each_region(src_region, src) + i++; + if (!i) + return 0; + + ranges = kmalloc_array(i, sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN); + if (!ranges) + return -ENOMEM; + i = 0; + damon_for_each_region(src_region, src) + ranges[i++] = src_region->ar; + err = damon_set_regions(dst, ranges, i, src_min_sz_region); + kfree(ranges); + return err; +} + +static int damon_commit_target( + struct damon_target *dst, bool dst_has_pid, + struct damon_target *src, bool src_has_pid, + unsigned long src_min_sz_region) +{ + int err; + + err = damon_commit_target_regions(dst, src, src_min_sz_region); + if (err) + return err; + if (dst_has_pid) + put_pid(dst->pid); + if (src_has_pid) + get_pid(src->pid); + dst->pid = src->pid; + return 0; +} + +static int damon_commit_targets( + struct damon_ctx *dst, struct damon_ctx *src) +{ + struct damon_target *dst_target, *next, *src_target, *new_target; + int i = 0, j = 0, err; + + damon_for_each_target_safe(dst_target, next, dst) { + src_target = damon_nth_target(i++, src); + /* + * If src target is obsolete, do not commit the parameters to + * the dst target, and further remove the dst target. + */ + if (src_target && !src_target->obsolete) { + err = damon_commit_target( + dst_target, damon_target_has_pid(dst), + src_target, damon_target_has_pid(src), + src->min_sz_region); + if (err) + return err; + } else { + struct damos *s; + + damon_destroy_target(dst_target, dst); + damon_for_each_scheme(s, dst) { + if (s->quota.charge_target_from == dst_target) { + s->quota.charge_target_from = NULL; + s->quota.charge_addr_from = 0; + } + } + } + } + + damon_for_each_target_safe(src_target, next, src) { + if (j++ < i) + continue; + /* target to remove has no matching dst */ + if (src_target->obsolete) + return -EINVAL; + new_target = damon_new_target(); + if (!new_target) + return -ENOMEM; + err = damon_commit_target(new_target, false, + src_target, damon_target_has_pid(src), + src->min_sz_region); + if (err) { + damon_destroy_target(new_target, NULL); + return err; + } + damon_add_target(dst, new_target); + } + return 0; +} + +/** + * damon_commit_ctx() - Commit parameters of a DAMON context to another. + * @dst: The commit destination DAMON context. + * @src: The commit source DAMON context. + * + * This function copies user-specified parameters from @src to @dst and update + * the internal status and results accordingly. Users should use this function + * for context-level parameters update of running context, instead of manual + * in-place updates. + * + * This function should be called from parameters-update safe context, like + * damon_call(). + */ +int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) +{ + int err; + + err = damon_commit_schemes(dst, src); + if (err) + return err; + err = damon_commit_targets(dst, src); + if (err) + return err; + /* + * schemes and targets should be updated first, since + * 1. damon_set_attrs() updates monitoring results of targets and + * next_apply_sis of schemes, and + * 2. ops update should be done after pid handling is done (target + * committing require putting pids). + */ + if (!damon_attrs_equals(&dst->attrs, &src->attrs)) { + err = damon_set_attrs(dst, &src->attrs); + if (err) + return err; + } + dst->ops = src->ops; + dst->addr_unit = src->addr_unit; + dst->min_sz_region = src->min_sz_region; + + return 0; +} + /** * damon_nr_running_ctxs() - Return number of currently running contexts. */ @@ -496,8 +1294,8 @@ static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) if (ctx->attrs.min_nr_regions) sz /= ctx->attrs.min_nr_regions; - if (sz < DAMON_MIN_REGION) - sz = DAMON_MIN_REGION; + if (sz < ctx->min_sz_region) + sz = ctx->min_sz_region; return sz; } @@ -519,11 +1317,14 @@ static int __damon_start(struct damon_ctx *ctx) mutex_lock(&ctx->kdamond_lock); if (!ctx->kdamond) { err = 0; + reinit_completion(&ctx->kdamond_started); ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", nr_running_ctxs); if (IS_ERR(ctx->kdamond)) { err = PTR_ERR(ctx->kdamond); ctx->kdamond = NULL; + } else { + wait_for_completion(&ctx->kdamond_started); } } mutex_unlock(&ctx->kdamond_lock); @@ -586,8 +1387,7 @@ static int __damon_stop(struct damon_ctx *ctx) if (tsk) { get_task_struct(tsk); mutex_unlock(&ctx->kdamond_lock); - kthread_stop(tsk); - put_task_struct(tsk); + kthread_stop_put(tsk); return 0; } mutex_unlock(&ctx->kdamond_lock); @@ -615,36 +1415,112 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) return err; } -/* - * damon_check_reset_time_interval() - Check if a time interval is elapsed. - * @baseline: the time to check whether the interval has elapsed since - * @interval: the time interval (microseconds) +/** + * damon_is_running() - Returns if a given DAMON context is running. + * @ctx: The DAMON context to see if running. * - * See whether the given time interval has passed since the given baseline - * time. If so, it also updates the baseline to current time for next check. + * Return: true if @ctx is running, false otherwise. + */ +bool damon_is_running(struct damon_ctx *ctx) +{ + bool running; + + mutex_lock(&ctx->kdamond_lock); + running = ctx->kdamond != NULL; + mutex_unlock(&ctx->kdamond_lock); + return running; +} + +/** + * damon_call() - Invoke a given function on DAMON worker thread (kdamond). + * @ctx: DAMON context to call the function for. + * @control: Control variable of the call request. + * + * Ask DAMON worker thread (kdamond) of @ctx to call a function with an + * argument data that respectively passed via &damon_call_control->fn and + * &damon_call_control->data of @control. If &damon_call_control->repeat of + * @control is unset, further wait until the kdamond finishes handling of the + * request. Otherwise, return as soon as the request is made. * - * Return: true if the time interval has passed, or false otherwise. + * The kdamond executes the function with the argument in the main loop, just + * after a sampling of the iteration is finished. The function can hence + * safely access the internal data of the &struct damon_ctx without additional + * synchronization. The return value of the function will be saved in + * &damon_call_control->return_code. + * + * Return: 0 on success, negative error code otherwise. */ -static bool damon_check_reset_time_interval(struct timespec64 *baseline, - unsigned long interval) +int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) { - struct timespec64 now; + if (!control->repeat) + init_completion(&control->completion); + control->canceled = false; + INIT_LIST_HEAD(&control->list); - ktime_get_coarse_ts64(&now); - if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < - interval * 1000) - return false; - *baseline = now; - return true; + mutex_lock(&ctx->call_controls_lock); + list_add_tail(&control->list, &ctx->call_controls); + mutex_unlock(&ctx->call_controls_lock); + if (!damon_is_running(ctx)) + return -EINVAL; + if (control->repeat) + return 0; + wait_for_completion(&control->completion); + if (control->canceled) + return -ECANCELED; + return 0; +} + +/** + * damos_walk() - Invoke a given functions while DAMOS walk regions. + * @ctx: DAMON context to call the functions for. + * @control: Control variable of the walk request. + * + * Ask DAMON worker thread (kdamond) of @ctx to call a function for each region + * that the kdamond will apply DAMOS action to, and wait until the kdamond + * finishes handling of the request. + * + * The kdamond executes the given function in the main loop, for each region + * just after it applied any DAMOS actions of @ctx to it. The invocation is + * made only within one &damos->apply_interval_us since damos_walk() + * invocation, for each scheme. The given callback function can hence safely + * access the internal data of &struct damon_ctx and &struct damon_region that + * each of the scheme will apply the action for next interval, without + * additional synchronizations against the kdamond. If every scheme of @ctx + * passed at least one &damos->apply_interval_us, kdamond marks the request as + * completed so that damos_walk() can wakeup and return. + * + * Return: 0 on success, negative error code otherwise. + */ +int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control) +{ + init_completion(&control->completion); + control->canceled = false; + mutex_lock(&ctx->walk_control_lock); + if (ctx->walk_control) { + mutex_unlock(&ctx->walk_control_lock); + return -EBUSY; + } + ctx->walk_control = control; + mutex_unlock(&ctx->walk_control_lock); + if (!damon_is_running(ctx)) + return -EINVAL; + wait_for_completion(&control->completion); + if (control->canceled) + return -ECANCELED; + return 0; } /* - * Check whether it is time to flush the aggregated information + * Warn and fix corrupted ->nr_accesses[_bp] for investigations and preventing + * the problem being propagated. */ -static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) +static void damon_warn_fix_nr_accesses_corruption(struct damon_region *r) { - return damon_check_reset_time_interval(&ctx->last_aggregation, - ctx->attrs.aggr_interval); + if (r->nr_accesses_bp == r->nr_accesses * 10000) + return; + WARN_ONCE(true, "invalid nr_accesses_bp at reset: %u %u\n", + r->nr_accesses_bp, r->nr_accesses); + r->nr_accesses_bp = r->nr_accesses * 10000; } /* @@ -659,7 +1535,8 @@ static void kdamond_reset_aggregated(struct damon_ctx *c) struct damon_region *r; damon_for_each_region(r, t) { - trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); + trace_damon_aggregated(ti, r, damon_nr_regions(t)); + damon_warn_fix_nr_accesses_corruption(r); r->last_nr_accesses = r->nr_accesses; r->nr_accesses = 0; } @@ -667,18 +1544,80 @@ static void kdamond_reset_aggregated(struct damon_ctx *c) } } +static unsigned long damon_get_intervals_score(struct damon_ctx *c) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sz_region, max_access_events = 0, access_events = 0; + unsigned long target_access_events; + unsigned long goal_bp = c->attrs.intervals_goal.access_bp; + + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + sz_region = damon_sz_region(r); + max_access_events += sz_region * c->attrs.aggr_samples; + access_events += sz_region * r->nr_accesses; + } + } + target_access_events = max_access_events * goal_bp / 10000; + target_access_events = target_access_events ? : 1; + return access_events * 10000 / target_access_events; +} + +static unsigned long damon_feed_loop_next_input(unsigned long last_input, + unsigned long score); + +static unsigned long damon_get_intervals_adaptation_bp(struct damon_ctx *c) +{ + unsigned long score_bp, adaptation_bp; + + score_bp = damon_get_intervals_score(c); + adaptation_bp = damon_feed_loop_next_input(100000000, score_bp) / + 10000; + /* + * adaptaion_bp ranges from 1 to 20,000. Avoid too rapid reduction of + * the intervals by rescaling [1,10,000] to [5000, 10,000]. + */ + if (adaptation_bp <= 10000) + adaptation_bp = 5000 + adaptation_bp / 2; + return adaptation_bp; +} + +static void kdamond_tune_intervals(struct damon_ctx *c) +{ + unsigned long adaptation_bp; + struct damon_attrs new_attrs; + struct damon_intervals_goal *goal; + + adaptation_bp = damon_get_intervals_adaptation_bp(c); + if (adaptation_bp == 10000) + return; + + new_attrs = c->attrs; + goal = &c->attrs.intervals_goal; + new_attrs.sample_interval = min(goal->max_sample_us, + c->attrs.sample_interval * adaptation_bp / 10000); + new_attrs.sample_interval = max(goal->min_sample_us, + new_attrs.sample_interval); + new_attrs.aggr_interval = new_attrs.sample_interval * + c->attrs.aggr_samples; + trace_damon_monitor_intervals_tune(new_attrs.sample_interval); + damon_set_attrs(c, &new_attrs); +} + static void damon_split_region_at(struct damon_target *t, struct damon_region *r, unsigned long sz_r); static bool __damos_valid_target(struct damon_region *r, struct damos *s) { unsigned long sz; + unsigned int nr_accesses = r->nr_accesses_bp / 10000; sz = damon_sz_region(r); return s->pattern.min_sz_region <= sz && sz <= s->pattern.max_sz_region && - s->pattern.min_nr_accesses <= r->nr_accesses && - r->nr_accesses <= s->pattern.max_nr_accesses && + s->pattern.min_nr_accesses <= nr_accesses && + nr_accesses <= s->pattern.max_nr_accesses && s->pattern.min_age_region <= r->age && r->age <= s->pattern.max_age_region; } @@ -700,6 +1639,7 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, * @t: The target of the region. * @rp: The pointer to the region. * @s: The scheme to be applied. + * @min_sz_region: minimum region size. * * If a quota of a scheme has exceeded in a quota charge window, the scheme's * action would applied to only a part of the target access pattern fulfilling @@ -717,7 +1657,7 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, * Return: true if the region should be entirely skipped, false otherwise. */ static bool damos_skip_charged_region(struct damon_target *t, - struct damon_region **rp, struct damos *s) + struct damon_region **rp, struct damos *s, unsigned long min_sz_region) { struct damon_region *r = *rp; struct damos_quota *quota = &s->quota; @@ -739,11 +1679,11 @@ static bool damos_skip_charged_region(struct damon_target *t, if (quota->charge_addr_from && r->ar.start < quota->charge_addr_from) { sz_to_skip = ALIGN_DOWN(quota->charge_addr_from - - r->ar.start, DAMON_MIN_REGION); + r->ar.start, min_sz_region); if (!sz_to_skip) { - if (damon_sz_region(r) <= DAMON_MIN_REGION) + if (damon_sz_region(r) <= min_sz_region) return true; - sz_to_skip = DAMON_MIN_REGION; + sz_to_skip = min_sz_region; } damon_split_region_at(t, r, sz_to_skip); r = damon_next_region(r); @@ -756,13 +1696,167 @@ static bool damos_skip_charged_region(struct damon_target *t, } static void damos_update_stat(struct damos *s, - unsigned long sz_tried, unsigned long sz_applied) + unsigned long sz_tried, unsigned long sz_applied, + unsigned long sz_ops_filter_passed) { s->stat.nr_tried++; s->stat.sz_tried += sz_tried; if (sz_applied) s->stat.nr_applied++; s->stat.sz_applied += sz_applied; + s->stat.sz_ops_filter_passed += sz_ops_filter_passed; +} + +static bool damos_filter_match(struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos_filter *filter, + unsigned long min_sz_region) +{ + bool matched = false; + struct damon_target *ti; + int target_idx = 0; + unsigned long start, end; + + switch (filter->type) { + case DAMOS_FILTER_TYPE_TARGET: + damon_for_each_target(ti, ctx) { + if (ti == t) + break; + target_idx++; + } + matched = target_idx == filter->target_idx; + break; + case DAMOS_FILTER_TYPE_ADDR: + start = ALIGN_DOWN(filter->addr_range.start, min_sz_region); + end = ALIGN_DOWN(filter->addr_range.end, min_sz_region); + + /* inside the range */ + if (start <= r->ar.start && r->ar.end <= end) { + matched = true; + break; + } + /* outside of the range */ + if (r->ar.end <= start || end <= r->ar.start) { + matched = false; + break; + } + /* start before the range and overlap */ + if (r->ar.start < start) { + damon_split_region_at(t, r, start - r->ar.start); + matched = false; + break; + } + /* start inside the range */ + damon_split_region_at(t, r, end - r->ar.start); + matched = true; + break; + default: + return false; + } + + return matched == filter->matching; +} + +static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + struct damos_filter *filter; + + s->core_filters_allowed = false; + damos_for_each_core_filter(filter, s) { + if (damos_filter_match(ctx, t, r, filter, ctx->min_sz_region)) { + if (filter->allow) + s->core_filters_allowed = true; + return !filter->allow; + } + } + return s->core_filters_default_reject; +} + +/* + * damos_walk_call_walk() - Call &damos_walk_control->walk_fn. + * @ctx: The context of &damon_ctx->walk_control. + * @t: The monitoring target of @r that @s will be applied. + * @r: The region of @t that @s will be applied. + * @s: The scheme of @ctx that will be applied to @r. + * + * This function is called from kdamond whenever it asked the operation set to + * apply a DAMOS scheme action to a region. If a DAMOS walk request is + * installed by damos_walk() and not yet uninstalled, invoke it. + */ +static void damos_walk_call_walk(struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos *s, + unsigned long sz_filter_passed) +{ + struct damos_walk_control *control; + + if (s->walk_completed) + return; + + control = ctx->walk_control; + if (!control) + return; + + control->walk_fn(control->data, ctx, t, r, s, sz_filter_passed); +} + +/* + * damos_walk_complete() - Complete DAMOS walk request if all walks are done. + * @ctx: The context of &damon_ctx->walk_control. + * @s: A scheme of @ctx that all walks are now done. + * + * This function is called when kdamond finished applying the action of a DAMOS + * scheme to all regions that eligible for the given &damos->apply_interval_us. + * If every scheme of @ctx including @s now finished walking for at least one + * &damos->apply_interval_us, this function makrs the handling of the given + * DAMOS walk request is done, so that damos_walk() can wake up and return. + */ +static void damos_walk_complete(struct damon_ctx *ctx, struct damos *s) +{ + struct damos *siter; + struct damos_walk_control *control; + + control = ctx->walk_control; + if (!control) + return; + + s->walk_completed = true; + /* if all schemes completed, signal completion to walker */ + damon_for_each_scheme(siter, ctx) { + if (!siter->walk_completed) + return; + } + damon_for_each_scheme(siter, ctx) + siter->walk_completed = false; + + complete(&control->completion); + ctx->walk_control = NULL; +} + +/* + * damos_walk_cancel() - Cancel the current DAMOS walk request. + * @ctx: The context of &damon_ctx->walk_control. + * + * This function is called when @ctx is deactivated by DAMOS watermarks, DAMOS + * walk is requested but there is no DAMOS scheme to walk for, or the kdamond + * is already out of the main loop and therefore gonna be terminated, and hence + * cannot continue the walks. This function therefore marks the walk request + * as canceled, so that damos_walk() can wake up and return. + */ +static void damos_walk_cancel(struct damon_ctx *ctx) +{ + struct damos_walk_control *control; + + mutex_lock(&ctx->walk_control_lock); + control = ctx->walk_control; + mutex_unlock(&ctx->walk_control_lock); + + if (!control) + return; + control->canceled = true; + complete(&control->completion); + mutex_lock(&ctx->walk_control_lock); + ctx->walk_control = NULL; + mutex_unlock(&ctx->walk_control_lock); } static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, @@ -772,21 +1866,51 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, unsigned long sz = damon_sz_region(r); struct timespec64 begin, end; unsigned long sz_applied = 0; - int err = 0; + unsigned long sz_ops_filter_passed = 0; + /* + * We plan to support multiple context per kdamond, as DAMON sysfs + * implies with 'nr_contexts' file. Nevertheless, only single context + * per kdamond is supported for now. So, we can simply use '0' context + * index here. + */ + unsigned int cidx = 0; + struct damos *siter; /* schemes iterator */ + unsigned int sidx = 0; + struct damon_target *titer; /* targets iterator */ + unsigned int tidx = 0; + bool do_trace = false; + + /* get indices for trace_damos_before_apply() */ + if (trace_damos_before_apply_enabled()) { + damon_for_each_scheme(siter, c) { + if (siter == s) + break; + sidx++; + } + damon_for_each_target(titer, c) { + if (titer == t) + break; + tidx++; + } + do_trace = true; + } if (c->ops.apply_scheme) { if (quota->esz && quota->charged_sz + sz > quota->esz) { sz = ALIGN_DOWN(quota->esz - quota->charged_sz, - DAMON_MIN_REGION); + c->min_sz_region); if (!sz) goto update_stat; damon_split_region_at(t, r, sz); } + if (damos_filter_out(c, t, r, s)) + return; ktime_get_coarse_ts64(&begin); - if (c->callback.before_damos_apply) - err = c->callback.before_damos_apply(c, t, r, s); - if (!err) - sz_applied = c->ops.apply_scheme(c, t, r, s); + trace_damos_before_apply(cidx, sidx, tidx, r, + damon_nr_regions(t), do_trace); + sz_applied = c->ops.apply_scheme(c, t, r, s, + &sz_ops_filter_passed); + damos_walk_call_walk(c, t, r, s, sz_ops_filter_passed); ktime_get_coarse_ts64(&end); quota->total_charged_ns += timespec64_to_ns(&end) - timespec64_to_ns(&begin); @@ -800,7 +1924,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, r->age = 0; update_stat: - damos_update_stat(s, sz, sz_applied); + damos_update_stat(s, sz, sz_applied, sz_ops_filter_passed); } static void damon_do_apply_schemes(struct damon_ctx *c, @@ -812,6 +1936,9 @@ static void damon_do_apply_schemes(struct damon_ctx *c, damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; + if (c->passed_sample_intervals < s->next_apply_sis) + continue; + if (!s->wmarks.activated) continue; @@ -819,7 +1946,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c, if (quota->esz && quota->charged_sz >= quota->esz) continue; - if (damos_skip_charged_region(t, &r, s)) + if (damos_skip_charged_region(t, &r, s, c->min_sz_region)) continue; if (!damos_valid_target(c, t, r, s)) @@ -829,40 +1956,247 @@ static void damon_do_apply_schemes(struct damon_ctx *c, } } -/* Shouldn't be called if quota->ms and quota->sz are zero */ +/* + * damon_feed_loop_next_input() - get next input to achieve a target score. + * @last_input The last input. + * @score Current score that made with @last_input. + * + * Calculate next input to achieve the target score, based on the last input + * and current score. Assuming the input and the score are positively + * proportional, calculate how much compensation should be added to or + * subtracted from the last input as a proportion of the last input. Avoid + * next input always being zero by setting it non-zero always. In short form + * (assuming support of float and signed calculations), the algorithm is as + * below. + * + * next_input = max(last_input * ((goal - current) / goal + 1), 1) + * + * For simple implementation, we assume the target score is always 10,000. The + * caller should adjust @score for this. + * + * Returns next input that assumed to achieve the target score. + */ +static unsigned long damon_feed_loop_next_input(unsigned long last_input, + unsigned long score) +{ + const unsigned long goal = 10000; + /* Set minimum input as 10000 to avoid compensation be zero */ + const unsigned long min_input = 10000; + unsigned long score_goal_diff, compensation; + bool over_achieving = score > goal; + + if (score == goal) + return last_input; + if (score >= goal * 2) + return min_input; + + if (over_achieving) + score_goal_diff = score - goal; + else + score_goal_diff = goal - score; + + if (last_input < ULONG_MAX / score_goal_diff) + compensation = last_input * score_goal_diff / goal; + else + compensation = last_input / goal * score_goal_diff; + + if (over_achieving) + return max(last_input - compensation, min_input); + if (last_input < ULONG_MAX - compensation) + return last_input + compensation; + return ULONG_MAX; +} + +#ifdef CONFIG_PSI + +static u64 damos_get_some_mem_psi_total(void) +{ + if (static_branch_likely(&psi_disabled)) + return 0; + return div_u64(psi_system.total[PSI_AVGS][PSI_MEM * 2], + NSEC_PER_USEC); +} + +#else /* CONFIG_PSI */ + +static inline u64 damos_get_some_mem_psi_total(void) +{ + return 0; +}; + +#endif /* CONFIG_PSI */ + +#ifdef CONFIG_NUMA +static __kernel_ulong_t damos_get_node_mem_bp( + struct damos_quota_goal *goal) +{ + struct sysinfo i; + __kernel_ulong_t numerator; + + si_meminfo_node(&i, goal->nid); + if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP) + numerator = i.totalram - i.freeram; + else /* DAMOS_QUOTA_NODE_MEM_FREE_BP */ + numerator = i.freeram; + return numerator * 10000 / i.totalram; +} + +static unsigned long damos_get_node_memcg_used_bp( + struct damos_quota_goal *goal) +{ + struct mem_cgroup *memcg; + struct lruvec *lruvec; + unsigned long used_pages, numerator; + struct sysinfo i; + + rcu_read_lock(); + memcg = mem_cgroup_from_id(goal->memcg_id); + rcu_read_unlock(); + if (!memcg) { + if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) + return 0; + else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */ + return 10000; + } + mem_cgroup_flush_stats(memcg); + lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(goal->nid)); + used_pages = lruvec_page_state(lruvec, NR_ACTIVE_ANON); + used_pages += lruvec_page_state(lruvec, NR_INACTIVE_ANON); + used_pages += lruvec_page_state(lruvec, NR_ACTIVE_FILE); + used_pages += lruvec_page_state(lruvec, NR_INACTIVE_FILE); + + si_meminfo_node(&i, goal->nid); + if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) + numerator = used_pages; + else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */ + numerator = i.totalram - used_pages; + return numerator * 10000 / i.totalram; +} +#else +static __kernel_ulong_t damos_get_node_mem_bp( + struct damos_quota_goal *goal) +{ + return 0; +} + +static unsigned long damos_get_node_memcg_used_bp( + struct damos_quota_goal *goal) +{ + return 0; +} +#endif + + +static void damos_set_quota_goal_current_value(struct damos_quota_goal *goal) +{ + u64 now_psi_total; + + switch (goal->metric) { + case DAMOS_QUOTA_USER_INPUT: + /* User should already set goal->current_value */ + break; + case DAMOS_QUOTA_SOME_MEM_PSI_US: + now_psi_total = damos_get_some_mem_psi_total(); + goal->current_value = now_psi_total - goal->last_psi_total; + goal->last_psi_total = now_psi_total; + break; + case DAMOS_QUOTA_NODE_MEM_USED_BP: + case DAMOS_QUOTA_NODE_MEM_FREE_BP: + goal->current_value = damos_get_node_mem_bp(goal); + break; + case DAMOS_QUOTA_NODE_MEMCG_USED_BP: + case DAMOS_QUOTA_NODE_MEMCG_FREE_BP: + goal->current_value = damos_get_node_memcg_used_bp(goal); + break; + default: + break; + } +} + +/* Return the highest score since it makes schemes least aggressive */ +static unsigned long damos_quota_score(struct damos_quota *quota) +{ + struct damos_quota_goal *goal; + unsigned long highest_score = 0; + + damos_for_each_quota_goal(goal, quota) { + damos_set_quota_goal_current_value(goal); + highest_score = max(highest_score, + goal->current_value * 10000 / + goal->target_value); + } + + return highest_score; +} + +/* + * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty + */ static void damos_set_effective_quota(struct damos_quota *quota) { unsigned long throughput; - unsigned long esz; + unsigned long esz = ULONG_MAX; - if (!quota->ms) { + if (!quota->ms && list_empty("a->goals)) { quota->esz = quota->sz; return; } - if (quota->total_charged_ns) - throughput = quota->total_charged_sz * 1000000 / - quota->total_charged_ns; - else - throughput = PAGE_SIZE * 1024; - esz = throughput * quota->ms; + if (!list_empty("a->goals)) { + unsigned long score = damos_quota_score(quota); + + quota->esz_bp = damon_feed_loop_next_input( + max(quota->esz_bp, 10000UL), + score); + esz = quota->esz_bp / 10000; + } + + if (quota->ms) { + if (quota->total_charged_ns) + throughput = mult_frac(quota->total_charged_sz, 1000000, + quota->total_charged_ns); + else + throughput = PAGE_SIZE * 1024; + esz = min(throughput * quota->ms, esz); + } if (quota->sz && quota->sz < esz) esz = quota->sz; + quota->esz = esz; } +static void damos_trace_esz(struct damon_ctx *c, struct damos *s, + struct damos_quota *quota) +{ + unsigned int cidx = 0, sidx = 0; + struct damos *siter; + + damon_for_each_scheme(siter, c) { + if (siter == s) + break; + sidx++; + } + trace_damos_esz(cidx, sidx, quota->esz); +} + static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) { struct damos_quota *quota = &s->quota; struct damon_target *t; struct damon_region *r; - unsigned long cumulated_sz; + unsigned long cumulated_sz, cached_esz; unsigned int score, max_score = 0; - if (!quota->ms && !quota->sz) + if (!quota->ms && !quota->sz && list_empty("a->goals)) return; + /* First charge window */ + if (!quota->total_charged_sz && !quota->charged_from) { + quota->charged_from = jiffies; + damos_set_effective_quota(quota); + } + /* New charge window starts */ if (time_after_eq(jiffies, quota->charged_from + msecs_to_jiffies(quota->reset_interval))) { @@ -871,20 +2205,27 @@ static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) quota->total_charged_sz += quota->charged_sz; quota->charged_from = jiffies; quota->charged_sz = 0; + if (trace_damos_esz_enabled()) + cached_esz = quota->esz; damos_set_effective_quota(quota); + if (trace_damos_esz_enabled() && quota->esz != cached_esz) + damos_trace_esz(c, s, quota); } if (!c->ops.get_scheme_score) return; /* Fill up the score histogram */ - memset(quota->histogram, 0, sizeof(quota->histogram)); + memset(c->regions_score_histogram, 0, + sizeof(*c->regions_score_histogram) * + (DAMOS_MAX_SCORE + 1)); damon_for_each_target(t, c) { damon_for_each_region(r, t) { if (!__damos_valid_target(r, s)) continue; score = c->ops.get_scheme_score(c, t, r, s); - quota->histogram[score] += damon_sz_region(r); + c->regions_score_histogram[score] += + damon_sz_region(r); if (score > max_score) max_score = score; } @@ -892,7 +2233,7 @@ static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) /* Set the min score limit */ for (cumulated_sz = 0, score = max_score; ; score--) { - cumulated_sz += quota->histogram[score]; + cumulated_sz += c->regions_score_histogram[score]; if (cumulated_sz >= quota->esz || !score) break; } @@ -904,18 +2245,41 @@ static void kdamond_apply_schemes(struct damon_ctx *c) struct damon_target *t; struct damon_region *r, *next_r; struct damos *s; + unsigned long sample_interval = c->attrs.sample_interval ? + c->attrs.sample_interval : 1; + bool has_schemes_to_apply = false; damon_for_each_scheme(s, c) { + if (c->passed_sample_intervals < s->next_apply_sis) + continue; + if (!s->wmarks.activated) continue; + has_schemes_to_apply = true; + damos_adjust_quota(c, s); } + if (!has_schemes_to_apply) + return; + + mutex_lock(&c->walk_control_lock); damon_for_each_target(t, c) { damon_for_each_region_safe(r, next_r, t) damon_do_apply_schemes(c, t, r); } + + damon_for_each_scheme(s, c) { + if (c->passed_sample_intervals < s->next_apply_sis) + continue; + damos_walk_complete(c, s); + s->next_apply_sis = c->passed_sample_intervals + + (s->apply_interval_us ? s->apply_interval_us : + c->attrs.aggr_interval) / sample_interval; + s->last_applied = NULL; + } + mutex_unlock(&c->walk_control_lock); } /* @@ -928,6 +2292,7 @@ static void damon_merge_two_regions(struct damon_target *t, l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / (sz_l + sz_r); + l->nr_accesses_bp = l->nr_accesses * 10000; l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); l->ar.end = r->ar.end; damon_destroy_region(r, t); @@ -948,6 +2313,8 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, damon_for_each_region_safe(r, next, t) { if (abs(r->nr_accesses - r->last_nr_accesses) > thres) r->age = 0; + else if ((r->nr_accesses == 0) != (r->last_nr_accesses == 0)) + r->age = 0; else r->age++; @@ -970,14 +2337,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, * access frequencies are similar. This is for minimizing the monitoring * overhead under the dynamically changeable access pattern. If a merge was * unnecessarily made, later 'kdamond_split_regions()' will revert it. + * + * The total number of regions could be higher than the user-defined limit, + * max_nr_regions for some cases. For example, the user can update + * max_nr_regions to a number that lower than the current number of regions + * while DAMON is running. For such a case, repeat merging until the limit is + * met while increasing @threshold up to possible maximum level. */ static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, unsigned long sz_limit) { struct damon_target *t; - - damon_for_each_target(t, c) - damon_merge_regions_of(t, threshold, sz_limit); + unsigned int nr_regions; + unsigned int max_thres; + + max_thres = c->attrs.aggr_interval / + (c->attrs.sample_interval ? c->attrs.sample_interval : 1); + do { + nr_regions = 0; + damon_for_each_target(t, c) { + damon_merge_regions_of(t, threshold, sz_limit); + nr_regions += damon_nr_regions(t); + } + threshold = max(1, threshold * 2); + } while (nr_regions > c->attrs.max_nr_regions && + threshold / 2 < max_thres); } /* @@ -999,12 +2383,15 @@ static void damon_split_region_at(struct damon_target *t, new->age = r->age; new->last_nr_accesses = r->last_nr_accesses; + new->nr_accesses_bp = r->nr_accesses_bp; + new->nr_accesses = r->nr_accesses; damon_insert_region(new, r, damon_next_region(r), t); } /* Split every region in the given target into 'nr_subs' regions */ -static void damon_split_regions_of(struct damon_target *t, int nr_subs) +static void damon_split_regions_of(struct damon_target *t, int nr_subs, + unsigned long min_sz_region) { struct damon_region *r, *next; unsigned long sz_region, sz_sub = 0; @@ -1014,13 +2401,13 @@ static void damon_split_regions_of(struct damon_target *t, int nr_subs) sz_region = damon_sz_region(r); for (i = 0; i < nr_subs - 1 && - sz_region > 2 * DAMON_MIN_REGION; i++) { + sz_region > 2 * min_sz_region; i++) { /* * Randomly select size of left sub-region to be at * least 10 percent and at most 90% of original region */ sz_sub = ALIGN_DOWN(damon_rand(1, 10) * - sz_region / 10, DAMON_MIN_REGION); + sz_region / 10, min_sz_region); /* Do not allow blank region */ if (sz_sub == 0 || sz_sub >= sz_region) continue; @@ -1060,24 +2447,12 @@ static void kdamond_split_regions(struct damon_ctx *ctx) nr_subregions = 3; damon_for_each_target(t, ctx) - damon_split_regions_of(t, nr_subregions); + damon_split_regions_of(t, nr_subregions, ctx->min_sz_region); last_nr_regions = nr_regions; } /* - * Check whether it is time to check and apply the operations-related data - * structures. - * - * Returns true if it is. - */ -static bool kdamond_need_update_operations(struct damon_ctx *ctx) -{ - return damon_check_reset_time_interval(&ctx->last_ops_update, - ctx->attrs.ops_update_interval); -} - -/* * Check whether current monitoring should be stopped * * The monitoring is stopped when either the user requested to stop, or all @@ -1103,14 +2478,14 @@ static bool kdamond_need_stop(struct damon_ctx *ctx) return true; } -static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) +static int damos_get_wmark_metric_value(enum damos_wmark_metric metric, + unsigned long *metric_value) { - struct sysinfo i; - switch (metric) { case DAMOS_WMARK_FREE_MEM_RATE: - si_meminfo(&i); - return i.freeram * 1000 / i.totalram; + *metric_value = global_zone_page_state(NR_FREE_PAGES) * 1000 / + totalram_pages(); + return 0; default: break; } @@ -1125,17 +2500,15 @@ static unsigned long damos_wmark_wait_us(struct damos *scheme) { unsigned long metric; - if (scheme->wmarks.metric == DAMOS_WMARK_NONE) + if (damos_get_wmark_metric_value(scheme->wmarks.metric, &metric)) return 0; - metric = damos_wmark_metric_value(scheme->wmarks.metric); /* higher than high watermark or lower than low watermark */ if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { if (scheme->wmarks.activated) pr_debug("deactivate a scheme (%d) for %s wmark\n", - scheme->action, - metric > scheme->wmarks.high ? - "high" : "low"); + scheme->action, + str_high_low(metric > scheme->wmarks.high)); scheme->wmarks.activated = false; return scheme->wmarks.interval; } @@ -1153,11 +2526,60 @@ static unsigned long damos_wmark_wait_us(struct damos *scheme) static void kdamond_usleep(unsigned long usecs) { - /* See Documentation/timers/timers-howto.rst for the thresholds */ - if (usecs > 20 * USEC_PER_MSEC) + if (usecs >= USLEEP_RANGE_UPPER_BOUND) schedule_timeout_idle(usecs_to_jiffies(usecs)); else - usleep_idle_range(usecs, usecs + 1); + usleep_range_idle(usecs, usecs + 1); +} + +/* + * kdamond_call() - handle damon_call_control objects. + * @ctx: The &struct damon_ctx of the kdamond. + * @cancel: Whether to cancel the invocation of the function. + * + * If there are &struct damon_call_control requests that registered via + * &damon_call() on @ctx, do or cancel the invocation of the function depending + * on @cancel. @cancel is set when the kdamond is already out of the main loop + * and therefore will be terminated. + */ +static void kdamond_call(struct damon_ctx *ctx, bool cancel) +{ + struct damon_call_control *control; + LIST_HEAD(repeat_controls); + int ret = 0; + + while (true) { + mutex_lock(&ctx->call_controls_lock); + control = list_first_entry_or_null(&ctx->call_controls, + struct damon_call_control, list); + mutex_unlock(&ctx->call_controls_lock); + if (!control) + break; + if (cancel) { + control->canceled = true; + } else { + ret = control->fn(control->data); + control->return_code = ret; + } + mutex_lock(&ctx->call_controls_lock); + list_del(&control->list); + mutex_unlock(&ctx->call_controls_lock); + if (!control->repeat) { + complete(&control->completion); + } else if (control->canceled && control->dealloc_on_cancel) { + kfree(control); + continue; + } else { + list_add(&control->list, &repeat_controls); + } + } + control = list_first_entry_or_null(&repeat_controls, + struct damon_call_control, list); + if (!control || cancel) + return; + mutex_lock(&ctx->call_controls_lock); + list_add_tail(&control->list, &ctx->call_controls); + mutex_unlock(&ctx->call_controls_lock); } /* Returns negative error code if it's not activated but should return */ @@ -1181,13 +2603,34 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) kdamond_usleep(min_wait_time); - if (ctx->callback.after_wmarks_check && - ctx->callback.after_wmarks_check(ctx)) - break; + kdamond_call(ctx, false); + damos_walk_cancel(ctx); } return -EBUSY; } +static void kdamond_init_ctx(struct damon_ctx *ctx) +{ + unsigned long sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + unsigned long apply_interval; + struct damos *scheme; + + ctx->passed_sample_intervals = 0; + ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / + sample_interval; + ctx->next_intervals_tune_sis = ctx->next_aggregation_sis * + ctx->attrs.intervals_goal.aggrs; + + damon_for_each_scheme(scheme, ctx) { + apply_interval = scheme->apply_interval_us ? + scheme->apply_interval_us : ctx->attrs.aggr_interval; + scheme->next_apply_sis = apply_interval / sample_interval; + damos_set_filters_default_reject(scheme); + } +} + /* * The monitoring daemon that runs as a kernel thread */ @@ -1201,43 +2644,99 @@ static int kdamond_fn(void *data) pr_debug("kdamond (%d) starts\n", current->pid); + complete(&ctx->kdamond_started); + kdamond_init_ctx(ctx); + if (ctx->ops.init) ctx->ops.init(ctx); - if (ctx->callback.before_start && ctx->callback.before_start(ctx)) + ctx->regions_score_histogram = kmalloc_array(DAMOS_MAX_SCORE + 1, + sizeof(*ctx->regions_score_histogram), GFP_KERNEL); + if (!ctx->regions_score_histogram) goto done; sz_limit = damon_region_sz_limit(ctx); while (!kdamond_need_stop(ctx)) { + /* + * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could + * be changed from kdamond_call(). Read the values here, and + * use those for this iteration. That is, damon_set_attrs() + * updated new values are respected from next iteration. + */ + unsigned long next_aggregation_sis = ctx->next_aggregation_sis; + unsigned long next_ops_update_sis = ctx->next_ops_update_sis; + unsigned long sample_interval = ctx->attrs.sample_interval; + if (kdamond_wait_activation(ctx)) break; if (ctx->ops.prepare_access_checks) ctx->ops.prepare_access_checks(ctx); - if (ctx->callback.after_sampling && - ctx->callback.after_sampling(ctx)) - break; - kdamond_usleep(ctx->attrs.sample_interval); + kdamond_usleep(sample_interval); + ctx->passed_sample_intervals++; if (ctx->ops.check_accesses) max_nr_accesses = ctx->ops.check_accesses(ctx); - if (kdamond_aggregate_interval_passed(ctx)) { + if (ctx->passed_sample_intervals >= next_aggregation_sis) kdamond_merge_regions(ctx, max_nr_accesses / 10, sz_limit); - if (ctx->callback.after_aggregation && - ctx->callback.after_aggregation(ctx)) - break; + + /* + * do kdamond_call() and kdamond_apply_schemes() after + * kdamond_merge_regions() if possible, to reduce overhead + */ + kdamond_call(ctx, false); + if (!list_empty(&ctx->schemes)) kdamond_apply_schemes(ctx); + else + damos_walk_cancel(ctx); + + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + if (ctx->passed_sample_intervals >= next_aggregation_sis) { + if (ctx->attrs.intervals_goal.aggrs && + ctx->passed_sample_intervals >= + ctx->next_intervals_tune_sis) { + /* + * ctx->next_aggregation_sis might be updated + * from kdamond_call(). In the case, + * damon_set_attrs() which will be called from + * kdamond_tune_interval() may wrongly think + * this is in the middle of the current + * aggregation, and make aggregation + * information reset for all regions. Then, + * following kdamond_reset_aggregated() call + * will make the region information invalid, + * particularly for ->nr_accesses_bp. + * + * Reset ->next_aggregation_sis to avoid that. + * It will anyway correctly updated after this + * if caluse. + */ + ctx->next_aggregation_sis = + next_aggregation_sis; + ctx->next_intervals_tune_sis += + ctx->attrs.aggr_samples * + ctx->attrs.intervals_goal.aggrs; + kdamond_tune_intervals(ctx); + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + + } + ctx->next_aggregation_sis = next_aggregation_sis + + ctx->attrs.aggr_interval / sample_interval; + kdamond_reset_aggregated(ctx); kdamond_split_regions(ctx); - if (ctx->ops.reset_aggregated) - ctx->ops.reset_aggregated(ctx); } - if (kdamond_need_update_operations(ctx)) { + if (ctx->passed_sample_intervals >= next_ops_update_sis) { + ctx->next_ops_update_sis = next_ops_update_sis + + ctx->attrs.ops_update_interval / + sample_interval; if (ctx->ops.update) ctx->ops.update(ctx); sz_limit = damon_region_sz_limit(ctx); @@ -1249,22 +2748,25 @@ done: damon_destroy_region(r, t); } - if (ctx->callback.before_terminate) - ctx->callback.before_terminate(ctx); if (ctx->ops.cleanup) ctx->ops.cleanup(ctx); + kfree(ctx->regions_score_histogram); pr_debug("kdamond (%d) finishes\n", current->pid); mutex_lock(&ctx->kdamond_lock); ctx->kdamond = NULL; mutex_unlock(&ctx->kdamond_lock); + kdamond_call(ctx, true); + damos_walk_cancel(ctx); + mutex_lock(&damon_lock); nr_running_ctxs--; if (!nr_running_ctxs && running_exclusive_ctxs) running_exclusive_ctxs = false; mutex_unlock(&damon_lock); + damon_destroy_targets(ctx); return 0; } @@ -1315,6 +2817,7 @@ static bool damon_find_biggest_system_ram(unsigned long *start, * @t: The monitoring target to set the region. * @start: The pointer to the start address of the region. * @end: The pointer to the end address of the region. + * @min_sz_region: Minimum region size. * * This function sets the region of @t as requested by @start and @end. If the * values of @start and @end are zero, however, this function finds the biggest @@ -1325,7 +2828,8 @@ static bool damon_find_biggest_system_ram(unsigned long *start, * Return: 0 on success, negative error code otherwise. */ int damon_set_region_biggest_system_ram_default(struct damon_target *t, - unsigned long *start, unsigned long *end) + unsigned long *start, unsigned long *end, + unsigned long min_sz_region) { struct damon_addr_range addr_range; @@ -1338,7 +2842,87 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, addr_range.start = *start; addr_range.end = *end; - return damon_set_regions(t, &addr_range, 1); + return damon_set_regions(t, &addr_range, 1, min_sz_region); +} + +/* + * damon_moving_sum() - Calculate an inferred moving sum value. + * @mvsum: Inferred sum of the last @len_window values. + * @nomvsum: Non-moving sum of the last discrete @len_window window values. + * @len_window: The number of last values to take care of. + * @new_value: New value that will be added to the pseudo moving sum. + * + * Moving sum (moving average * window size) is good for handling noise, but + * the cost of keeping past values can be high for arbitrary window size. This + * function implements a lightweight pseudo moving sum function that doesn't + * keep the past window values. + * + * It simply assumes there was no noise in the past, and get the no-noise + * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a + * non-moving sum of the last window. For example, if @len_window is 10 and we + * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25 + * values. Hence, this function simply drops @nomvsum / @len_window from + * given @mvsum and add @new_value. + * + * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for + * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For + * calculating next moving sum with a new value, we should drop 0 from 50 and + * add the new value. However, this function assumes it got value 5 for each + * of the last ten times. Based on the assumption, when the next value is + * measured, it drops the assumed past value, 5 from the current sum, and add + * the new value to get the updated pseduo-moving average. + * + * This means the value could have errors, but the errors will be disappeared + * for every @len_window aligned calls. For example, if @len_window is 10, the + * pseudo moving sum with 11th value to 19th value would have an error. But + * the sum with 20th value will not have the error. + * + * Return: Pseudo-moving average after getting the @new_value. + */ +static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, + unsigned int len_window, unsigned int new_value) +{ + return mvsum - nomvsum / len_window + new_value; +} + +/** + * damon_update_region_access_rate() - Update the access rate of a region. + * @r: The DAMON region to update for its access check result. + * @accessed: Whether the region has accessed during last sampling interval. + * @attrs: The damon_attrs of the DAMON context. + * + * Update the access rate of a region with the region's last sampling interval + * access check result. + * + * Usually this will be called by &damon_operations->check_accesses callback. + */ +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs) +{ + unsigned int len_window = 1; + + /* + * sample_interval can be zero, but cannot be larger than + * aggr_interval, owing to validation of damon_set_attrs(). + */ + if (attrs->sample_interval) + len_window = damon_max_nr_accesses(attrs); + r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp, + r->last_nr_accesses * 10000, len_window, + accessed ? 10000 : 0); + + if (accessed) + r->nr_accesses++; +} + +/** + * damon_initialized() - Return if DAMON is ready to be used. + * + * Return: true if DAMON is ready to be used, false otherwise. + */ +bool damon_initialized(void) +{ + return damon_region_cache != NULL; } static int __init damon_init(void) @@ -1354,4 +2938,4 @@ static int __init damon_init(void) subsys_initcall(damon_init); -#include "core-test.h" +#include "tests/core-kunit.h" |
