diff options
Diffstat (limited to 'tools/testing/selftests')
54 files changed, 3885 insertions, 582 deletions
diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index 8832f3d1cb61..0e89fcff4d05 100644 --- a/tools/testing/selftests/cgroup/lib/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -19,6 +19,8 @@ #include "cgroup_util.h" #include "../../clone3/clone3_selftests.h" +bool cg_test_v1_named; + /* Returns read len on success, or -errno on failure. */ ssize_t read_text(const char *path, char *buf, size_t max_len) { @@ -361,7 +363,7 @@ int cg_enter_current(const char *cgroup) int cg_enter_current_thread(const char *cgroup) { - return cg_write(cgroup, "cgroup.threads", "0"); + return cg_write(cgroup, CG_THREADS_FILE, "0"); } int cg_run(const char *cgroup, diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index adb2bc193183..c69cab66254b 100644 --- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h @@ -13,6 +13,10 @@ #define TEST_UID 65534 /* usually nobody, any !root is fine */ +#define CG_THREADS_FILE (!cg_test_v1_named ? "cgroup.threads" : "tasks") +#define CG_NAMED_NAME "selftest" +#define CG_PATH_FORMAT (!cg_test_v1_named ? "0::%s" : (":name=" CG_NAMED_NAME ":%s")) + /* * Checks if two given values differ by less than err% of their sum. */ @@ -65,3 +69,4 @@ extern int dirfd_open_opath(const char *dir); extern int cg_prepare_for_wait(const char *cgroup); extern int memcg_prepare_for_wait(const char *cgroup); extern int cg_wait_for(int fd); +extern bool cg_test_v1_named; diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index a5672a91d273..a360e2eb2eef 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -5,6 +5,8 @@ #include <linux/sched.h> #include <sys/types.h> #include <sys/mman.h> +#include <sys/mount.h> +#include <sys/stat.h> #include <sys/wait.h> #include <unistd.h> #include <fcntl.h> @@ -19,6 +21,9 @@ #include "cgroup_util.h" static bool nsdelegate; +#ifndef CLONE_NEWCGROUP +#define CLONE_NEWCGROUP 0 +#endif static int touch_anon(char *buf, size_t size) { @@ -148,6 +153,9 @@ static int test_cgcore_populated(const char *root) int cgroup_fd = -EBADF; pid_t pid; + if (cg_test_v1_named) + return KSFT_SKIP; + cg_test_a = cg_name(root, "cg_test_a"); cg_test_b = cg_name(root, "cg_test_a/cg_test_b"); cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c"); @@ -277,6 +285,9 @@ static int test_cgcore_invalid_domain(const char *root) int ret = KSFT_FAIL; char *grandparent = NULL, *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + grandparent = cg_name(root, "cg_test_grandparent"); parent = cg_name(root, "cg_test_grandparent/cg_test_parent"); child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child"); @@ -339,6 +350,9 @@ static int test_cgcore_parent_becomes_threaded(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -378,7 +392,8 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; - if (cg_read_strstr(root, "cgroup.controllers", "cpu") || + if (cg_test_v1_named || + cg_read_strstr(root, "cgroup.controllers", "cpu") || cg_write(root, "cgroup.subtree_control", "+cpu")) { ret = KSFT_SKIP; goto cleanup; @@ -430,6 +445,9 @@ static int test_cgcore_top_down_constraint_enable(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -465,6 +483,9 @@ static int test_cgcore_top_down_constraint_disable(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -506,6 +527,9 @@ static int test_cgcore_internal_process_constraint(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -573,7 +597,7 @@ static int test_cgcore_proc_migration(const char *root) } cg_enter_current(dst); - if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1) + if (cg_read_lc(dst, CG_THREADS_FILE) != n_threads + 1) goto cleanup; ret = KSFT_PASS; @@ -605,7 +629,7 @@ static void *migrating_thread_fn(void *arg) char lines[3][PATH_MAX]; for (g = 1; g < 3; ++g) - snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0])); + snprintf(lines[g], sizeof(lines[g]), CG_PATH_FORMAT, grps[g] + strlen(grps[0])); for (i = 0; i < n_iterations; ++i) { cg_enter_current_thread(grps[(i % 2) + 1]); @@ -642,10 +666,12 @@ static int test_cgcore_thread_migration(const char *root) if (cg_create(grps[2])) goto cleanup; - if (cg_write(grps[1], "cgroup.type", "threaded")) - goto cleanup; - if (cg_write(grps[2], "cgroup.type", "threaded")) - goto cleanup; + if (!cg_test_v1_named) { + if (cg_write(grps[1], "cgroup.type", "threaded")) + goto cleanup; + if (cg_write(grps[2], "cgroup.type", "threaded")) + goto cleanup; + } if (cg_enter_current(grps[1])) goto cleanup; @@ -659,7 +685,7 @@ static int test_cgcore_thread_migration(const char *root) if (retval) goto cleanup; - snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0])); + snprintf(line, sizeof(line), CG_PATH_FORMAT, grps[1] + strlen(grps[0])); if (proc_read_strstr(0, 1, "cgroup", line)) goto cleanup; @@ -842,6 +868,38 @@ cleanup: return ret; } +static int setup_named_v1_root(char *root, size_t len, const char *name) +{ + char options[PATH_MAX]; + int r; + + r = snprintf(root, len, "/mnt/cg_selftest"); + if (r < 0) + return r; + + r = snprintf(options, sizeof(options), "none,name=%s", name); + if (r < 0) + return r; + + r = mkdir(root, 0755); + if (r < 0 && errno != EEXIST) + return r; + + r = mount("none", root, "cgroup", 0, options); + if (r < 0) + return r; + + return 0; +} + +static void cleanup_named_v1_root(char *root) +{ + if (!cg_test_v1_named) + return; + umount(root); + rmdir(root); +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -867,13 +925,18 @@ int main(int argc, char *argv[]) char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) - ksft_exit_skip("cgroup v2 isn't mounted\n"); + if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) { + if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME)) + ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n"); + cg_test_v1_named = true; + goto post_v2_setup; + } if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) if (cg_write(root, "cgroup.subtree_control", "+memory")) ksft_exit_skip("Failed to set memory controller\n"); +post_v2_setup: for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: @@ -889,5 +952,6 @@ int main(int argc, char *argv[]) } } + cleanup_named_v1_root(root); return ret; } diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index a2b50af8e9ee..2a60e6c41940 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -2,6 +2,7 @@ #define _GNU_SOURCE #include <linux/limits.h> +#include <sys/param.h> #include <sys/sysinfo.h> #include <sys/wait.h> #include <errno.h> @@ -645,10 +646,16 @@ test_cpucg_nested_weight_underprovisioned(const char *root) static int test_cpucg_max(const char *root) { int ret = KSFT_FAIL; - long usage_usec, user_usec; - long usage_seconds = 1; - long expected_usage_usec = usage_seconds * USEC_PER_SEC; + long quota_usec = 1000; + long default_period_usec = 100000; /* cpu.max's default period */ + long duration_seconds = 1; + + long duration_usec = duration_seconds * USEC_PER_SEC; + long usage_usec, n_periods, remainder_usec, expected_usage_usec; char *cpucg; + char quota_buf[32]; + + snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec); cpucg = cg_name(root, "cpucg_test"); if (!cpucg) @@ -657,13 +664,13 @@ static int test_cpucg_max(const char *root) if (cg_create(cpucg)) goto cleanup; - if (cg_write(cpucg, "cpu.max", "1000")) + if (cg_write(cpucg, "cpu.max", quota_buf)) goto cleanup; struct cpu_hog_func_param param = { .nprocs = 1, .ts = { - .tv_sec = usage_seconds, + .tv_sec = duration_seconds, .tv_nsec = 0, }, .clock_type = CPU_HOG_CLOCK_WALL, @@ -672,14 +679,19 @@ static int test_cpucg_max(const char *root) goto cleanup; usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); - user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); - if (user_usec <= 0) + if (usage_usec <= 0) goto cleanup; - if (user_usec >= expected_usage_usec) - goto cleanup; + /* + * The following calculation applies only since + * the cpu hog is set to run as per wall-clock time + */ + n_periods = duration_usec / default_period_usec; + remainder_usec = duration_usec - n_periods * default_period_usec; + expected_usage_usec + = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (values_close(usage_usec, expected_usage_usec, 95)) + if (!values_close(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; @@ -698,10 +710,16 @@ cleanup: static int test_cpucg_max_nested(const char *root) { int ret = KSFT_FAIL; - long usage_usec, user_usec; - long usage_seconds = 1; - long expected_usage_usec = usage_seconds * USEC_PER_SEC; + long quota_usec = 1000; + long default_period_usec = 100000; /* cpu.max's default period */ + long duration_seconds = 1; + + long duration_usec = duration_seconds * USEC_PER_SEC; + long usage_usec, n_periods, remainder_usec, expected_usage_usec; char *parent, *child; + char quota_buf[32]; + + snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec); parent = cg_name(root, "cpucg_parent"); child = cg_name(parent, "cpucg_child"); @@ -717,13 +735,13 @@ static int test_cpucg_max_nested(const char *root) if (cg_create(child)) goto cleanup; - if (cg_write(parent, "cpu.max", "1000")) + if (cg_write(parent, "cpu.max", quota_buf)) goto cleanup; struct cpu_hog_func_param param = { .nprocs = 1, .ts = { - .tv_sec = usage_seconds, + .tv_sec = duration_seconds, .tv_nsec = 0, }, .clock_type = CPU_HOG_CLOCK_WALL, @@ -732,14 +750,19 @@ static int test_cpucg_max_nested(const char *root) goto cleanup; usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec"); - user_usec = cg_read_key_long(child, "cpu.stat", "user_usec"); - if (user_usec <= 0) + if (usage_usec <= 0) goto cleanup; - if (user_usec >= expected_usage_usec) - goto cleanup; + /* + * The following calculation applies only since + * the cpu hog is set to run as per wall-clock time + */ + n_periods = duration_usec / default_period_usec; + remainder_usec = duration_usec - n_periods * default_period_usec; + expected_usage_usec + = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (values_close(usage_usec, expected_usage_usec, 95)) + if (!values_close(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 96693d8772be..63b3c9aad399 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -308,6 +308,7 @@ static int test_kmem_dead_cgroups(const char *root) char *parent; long dead; int i; + int max_time = 20; parent = cg_name(root, "kmem_dead_cgroups_test"); if (!parent) @@ -322,7 +323,7 @@ static int test_kmem_dead_cgroups(const char *root) if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30)) goto cleanup; - for (i = 0; i < 5; i++) { + for (i = 0; i < max_time; i++) { dead = cg_read_key_long(parent, "cgroup.stat", "nr_dying_descendants "); if (dead == 0) { @@ -334,6 +335,8 @@ static int test_kmem_dead_cgroups(const char *root) * let's wait a bit and repeat. */ sleep(1); + if (i > 5) + printf("Waiting time longer than 5s; wait: %ds (dead: %ld)\n", i, dead); } cleanup: diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index 40de679248b8..e1f578ca2841 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -338,7 +338,7 @@ static int test_zswap_writeback_one(const char *cgroup, bool wb) return -1; if (wb != !!zswpwb_after) { - ksft_print_msg("zswpwb_after is %ld while wb is %s", + ksft_print_msg("zswpwb_after is %ld while wb is %s\n", zswpwb_after, wb ? "enabled" : "disabled"); return -1; } diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index ff21524be458..5b230deb19e8 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -7,6 +7,7 @@ TEST_FILES = _damon_sysfs.py # functionality tests TEST_PROGS += sysfs.sh +TEST_PROGS += sysfs.py TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py TEST_PROGS += damos_tried_regions.py damon_nr_regions.py @@ -15,6 +16,7 @@ TEST_PROGS += reclaim.sh lru_sort.sh # regression tests (reproducers of previously found bugs) TEST_PROGS += sysfs_update_removed_scheme_dir.sh TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py +TEST_PROGS += sysfs_memcg_path_leak.sh EXTRA_CLEAN = __pycache__ diff --git a/tools/testing/selftests/damon/_common.sh b/tools/testing/selftests/damon/_common.sh new file mode 100644 index 000000000000..0279698f733e --- /dev/null +++ b/tools/testing/selftests/damon/_common.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +check_dependencies() +{ + if [ $EUID -ne 0 ] + then + echo "Run as root" + exit $ksft_skip + fi +} diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 5b1cb6b3ce4e..a0e6290833fb 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -52,9 +52,9 @@ class DamosAccessPattern: if self.size is None: self.size = [0, 2**64 - 1] if self.nr_accesses is None: - self.nr_accesses = [0, 2**64 - 1] + self.nr_accesses = [0, 2**32 - 1] if self.age is None: - self.age = [0, 2**64 - 1] + self.age = [0, 2**32 - 1] def sysfs_dir(self): return os.path.join(self.scheme.sysfs_dir(), 'access_pattern') @@ -93,14 +93,16 @@ class DamosQuotaGoal: metric = None target_value = None current_value = None + nid = None effective_bytes = None quota = None # owner quota idx = None - def __init__(self, metric, target_value=10000, current_value=0): + def __init__(self, metric, target_value=10000, current_value=0, nid=0): self.metric = metric self.target_value = target_value self.current_value = current_value + self.nid = nid def sysfs_dir(self): return os.path.join(self.quota.sysfs_dir(), 'goals', '%d' % self.idx) @@ -118,6 +120,10 @@ class DamosQuotaGoal: self.current_value) if err is not None: return err + err = write_file(os.path.join(self.sysfs_dir(), 'nid'), self.nid) + if err is not None: + return err + return None class DamosQuota: @@ -125,12 +131,20 @@ class DamosQuota: ms = None # time quota goals = None # quota goals reset_interval_ms = None # quota reset interval + weight_sz_permil = None + weight_nr_accesses_permil = None + weight_age_permil = None scheme = None # owner scheme - def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0): + def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0, + weight_sz_permil=0, weight_nr_accesses_permil=0, + weight_age_permil=0): self.sz = sz self.ms = ms self.reset_interval_ms = reset_interval_ms + self.weight_sz_permil = weight_sz_permil + self.weight_nr_accesses_permil = weight_nr_accesses_permil + self.weight_age_permil = weight_age_permil self.goals = goals if goals is not None else [] for idx, goal in enumerate(self.goals): goal.idx = idx @@ -151,6 +165,20 @@ class DamosQuota: if err is not None: return err + err = write_file(os.path.join( + self.sysfs_dir(), 'weights', 'sz_permil'), self.weight_sz_permil) + if err is not None: + return err + err = write_file(os.path.join( + self.sysfs_dir(), 'weights', 'nr_accesses_permil'), + self.weight_nr_accesses_permil) + if err is not None: + return err + err = write_file(os.path.join( + self.sysfs_dir(), 'weights', 'age_permil'), self.weight_age_permil) + if err is not None: + return err + nr_goals_file = os.path.join(self.sysfs_dir(), 'goals', 'nr_goals') content, err = read_file(nr_goals_file) if err is not None: @@ -165,6 +193,178 @@ class DamosQuota: return err return None +class DamosWatermarks: + metric = None + interval = None + high = None + mid = None + low = None + scheme = None # owner scheme + + def __init__(self, metric='none', interval=0, high=0, mid=0, low=0): + self.metric = metric + self.interval = interval + self.high = high + self.mid = mid + self.low = low + + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), 'watermarks') + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'metric'), self.metric) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'interval_us'), + self.interval) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'high'), self.high) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'mid'), self.mid) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'low'), self.low) + if err is not None: + return err + +class DamosFilter: + type_ = None + matching = None + allow = None + memcg_path = None + addr_start = None + addr_end = None + target_idx = None + min_ = None + max_ = None + idx = None + filters = None # owner filters + + def __init__(self, type_='anon', matching=False, allow=False, + memcg_path='', addr_start=0, addr_end=0, target_idx=0, min_=0, + max_=0): + self.type_ = type_ + self.matching = matching + self.allow = allow + self.memcg_path = memcg_path, + self.addr_start = addr_start + self.addr_end = addr_end + self.target_idx = target_idx + self.min_ = min_ + self.max_ = max_ + + def sysfs_dir(self): + return os.path.join(self.filters.sysfs_dir(), '%d' % self.idx) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'type'), self.type_) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'matching'), + self.matching) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'allow'), self.allow) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'memcg_path'), + self.memcg_path) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'addr_start'), + self.addr_start) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'addr_end'), + self.addr_end) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'damon_target_idx'), + self.target_idx) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'min'), self.min_) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'max'), self.max_) + if err is not None: + return err + return None + +class DamosFilters: + name = None + filters = None + scheme = None # owner scheme + + def __init__(self, name, filters=[]): + self.name = name + self.filters = filters + for idx, filter_ in enumerate(self.filters): + filter_.idx = idx + filter_.filters = self + + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), self.name) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'nr_filters'), + len(self.filters)) + if err is not None: + return err + for filter_ in self.filters: + err = filter_.stage() + if err is not None: + return err + return None + +class DamosDest: + id = None + weight = None + idx = None + dests = None # owner dests + + def __init__(self, id=0, weight=0): + self.id = id + self.weight = weight + + def sysfs_dir(self): + return os.path.join(self.dests.sysfs_dir(), '%d' % self.idx) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'id'), self.id) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'weight'), self.weight) + if err is not None: + return err + return None + +class DamosDests: + dests = None + scheme = None # owner scheme + + def __init__(self, dests=[]): + self.dests = dests + for idx, dest in enumerate(self.dests): + dest.idx = idx + dest.dests = self + + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), 'dests') + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'nr_dests'), + len(self.dests)) + if err is not None: + return err + for dest in self.dests: + err = dest.stage() + if err is not None: + return err + return None + class DamosStats: nr_tried = None sz_tried = None @@ -190,8 +390,13 @@ class Damos: action = None access_pattern = None quota = None + watermarks = None + core_filters = None + ops_filters = None + filters = None apply_interval_us = None - # todo: Support watermarks, stats + target_nid = None + dests = None idx = None context = None tried_bytes = None @@ -199,12 +404,30 @@ class Damos: tried_regions = None def __init__(self, action='stat', access_pattern=DamosAccessPattern(), - quota=DamosQuota(), apply_interval_us=0): + quota=DamosQuota(), watermarks=DamosWatermarks(), + core_filters=[], ops_filters=[], filters=[], target_nid=0, + dests=DamosDests(), apply_interval_us=0): self.action = action self.access_pattern = access_pattern self.access_pattern.scheme = self self.quota = quota self.quota.scheme = self + self.watermarks = watermarks + self.watermarks.scheme = self + + self.core_filters = DamosFilters(name='core_filters', + filters=core_filters) + self.core_filters.scheme = self + self.ops_filters = DamosFilters(name='ops_filters', + filters=ops_filters) + self.ops_filters.scheme = self + self.filters = DamosFilters(name='filters', filters=filters) + self.filters.scheme = self + + self.target_nid = target_nid + self.dests = dests + self.dests.scheme = self + self.apply_interval_us = apply_interval_us def sysfs_dir(self): @@ -227,15 +450,26 @@ class Damos: if err is not None: return err - # disable watermarks - err = write_file( - os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none') + err = self.watermarks.stage() if err is not None: return err - # disable filters - err = write_file( - os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0') + err = self.core_filters.stage() + if err is not None: + return err + err = self.ops_filters.stage() + if err is not None: + return err + err = self.filters.stage() + if err is not None: + return err + + err = write_file(os.path.join(self.sysfs_dir(), 'target_nid'), '%d' % + self.target_nid) + if err is not None: + return err + + err = self.dests.stage() if err is not None: return err @@ -260,18 +494,56 @@ class DamonTarget: return write_file( os.path.join(self.sysfs_dir(), 'pid_target'), self.pid) +class IntervalsGoal: + access_bp = None + aggrs = None + min_sample_us = None + max_sample_us = None + attrs = None # owner DamonAttrs + + def __init__(self, access_bp=0, aggrs=0, min_sample_us=0, max_sample_us=0): + self.access_bp = access_bp + self.aggrs = aggrs + self.min_sample_us = min_sample_us + self.max_sample_us = max_sample_us + + def sysfs_dir(self): + return os.path.join(self.attrs.interval_sysfs_dir(), 'intervals_goal') + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'access_bp'), self.access_bp) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'aggrs'), self.aggrs) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'min_sample_us'), + self.min_sample_us) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'max_sample_us'), + self.max_sample_us) + if err is not None: + return err + return None + class DamonAttrs: sample_us = None aggr_us = None + intervals_goal = None update_us = None min_nr_regions = None max_nr_regions = None context = None - def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000, + def __init__(self, sample_us=5000, aggr_us=100000, + intervals_goal=IntervalsGoal(), update_us=1000000, min_nr_regions=10, max_nr_regions=1000): self.sample_us = sample_us self.aggr_us = aggr_us + self.intervals_goal = intervals_goal + self.intervals_goal.attrs = self self.update_us = update_us self.min_nr_regions = min_nr_regions self.max_nr_regions = max_nr_regions @@ -293,6 +565,9 @@ class DamonAttrs: self.aggr_us) if err is not None: return err + err = self.intervals_goal.stage() + if err is not None: + return err err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'), self.update_us) if err is not None: @@ -408,6 +683,9 @@ class Kdamond: if err is not None: return err err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') + if err is not None: + return err + self.pid, err = read_file(os.path.join(self.sysfs_dir(), 'pid')) return err def stop(self): diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py new file mode 100755 index 000000000000..7233369a3a44 --- /dev/null +++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py @@ -0,0 +1,222 @@ +#!/usr/bin/env drgn +# SPDX-License-Identifier: GPL-2.0 + +''' +Read DAMON context data and dump as a json string. +''' +import drgn +from drgn import FaultError, NULL, Object, cast, container_of, execscript, offsetof, reinterpret, sizeof +from drgn.helpers.common import * +from drgn.helpers.linux import * + +import json +import sys + +if "prog" not in globals(): + try: + prog = drgn.get_default_prog() + except drgn.NoDefaultProgramError: + prog = drgn.program_from_kernel() + drgn.set_default_prog(prog) + +def to_dict(object, attr_name_converter): + d = {} + for attr_name, converter in attr_name_converter: + d[attr_name] = converter(getattr(object, attr_name)) + return d + +def ops_to_dict(ops): + return to_dict(ops, [ + ['id', int], + ]) + +def intervals_goal_to_dict(goal): + return to_dict(goal, [ + ['access_bp', int], + ['aggrs', int], + ['min_sample_us', int], + ['max_sample_us', int], + ]) + +def attrs_to_dict(attrs): + return to_dict(attrs, [ + ['sample_interval', int], + ['aggr_interval', int], + ['ops_update_interval', int], + ['intervals_goal', intervals_goal_to_dict], + ['min_nr_regions', int], + ['max_nr_regions', int], + ]) + +def addr_range_to_dict(addr_range): + return to_dict(addr_range, [ + ['start', int], + ['end', int], + ]) + +def region_to_dict(region): + return to_dict(region, [ + ['ar', addr_range_to_dict], + ['sampling_addr', int], + ['nr_accesses', int], + ['nr_accesses_bp', int], + ['age', int], + ]) + +def regions_to_list(regions): + return [region_to_dict(r) + for r in list_for_each_entry( + 'struct damon_region', regions.address_of_(), 'list')] + +def target_to_dict(target): + return to_dict(target, [ + ['pid', int], + ['nr_regions', int], + ['regions_list', regions_to_list], + ]) + +def targets_to_list(targets): + return [target_to_dict(t) + for t in list_for_each_entry( + 'struct damon_target', targets.address_of_(), 'list')] + +def damos_access_pattern_to_dict(pattern): + return to_dict(pattern, [ + ['min_sz_region', int], + ['max_sz_region', int], + ['min_nr_accesses', int], + ['max_nr_accesses', int], + ['min_age_region', int], + ['max_age_region', int], + ]) + +def damos_quota_goal_to_dict(goal): + return to_dict(goal, [ + ['metric', int], + ['target_value', int], + ['current_value', int], + ['last_psi_total', int], + ['nid', int], + ]) + +def damos_quota_goals_to_list(goals): + return [damos_quota_goal_to_dict(g) + for g in list_for_each_entry( + 'struct damos_quota_goal', goals.address_of_(), 'list')] + +def damos_quota_to_dict(quota): + return to_dict(quota, [ + ['reset_interval', int], + ['ms', int], ['sz', int], + ['goals', damos_quota_goals_to_list], + ['esz', int], + ['weight_sz', int], + ['weight_nr_accesses', int], + ['weight_age', int], + ]) + +def damos_watermarks_to_dict(watermarks): + return to_dict(watermarks, [ + ['metric', int], + ['interval', int], + ['high', int], ['mid', int], ['low', int], + ]) + +def damos_migrate_dests_to_dict(dests): + nr_dests = int(dests.nr_dests) + node_id_arr = [] + weight_arr = [] + for i in range(nr_dests): + node_id_arr.append(int(dests.node_id_arr[i])) + weight_arr.append(int(dests.weight_arr[i])) + return { + 'node_id_arr': node_id_arr, + 'weight_arr': weight_arr, + 'nr_dests': nr_dests, + } + +def damos_filter_to_dict(damos_filter): + filter_type_keyword = { + 0: 'anon', + 1: 'active', + 2: 'memcg', + 3: 'young', + 4: 'hugepage_size', + 5: 'unmapped', + 6: 'addr', + 7: 'target' + } + dict_ = { + 'type': filter_type_keyword[int(damos_filter.type)], + 'matching': bool(damos_filter.matching), + 'allow': bool(damos_filter.allow), + } + type_ = dict_['type'] + if type_ == 'memcg': + dict_['memcg_id'] = int(damos_filter.memcg_id) + elif type_ == 'addr': + dict_['addr_range'] = [int(damos_filter.addr_range.start), + int(damos_filter.addr_range.end)] + elif type_ == 'target': + dict_['target_idx'] = int(damos_filter.target_idx) + elif type_ == 'hugeapge_size': + dict_['sz_range'] = [int(damos_filter.sz_range.min), + int(damos_filter.sz_range.max)] + return dict_ + +def scheme_to_dict(scheme): + dict_ = to_dict(scheme, [ + ['pattern', damos_access_pattern_to_dict], + ['action', int], + ['apply_interval_us', int], + ['quota', damos_quota_to_dict], + ['wmarks', damos_watermarks_to_dict], + ['target_nid', int], + ['migrate_dests', damos_migrate_dests_to_dict], + ]) + filters = [] + for f in list_for_each_entry( + 'struct damos_filter', scheme.filters.address_of_(), 'list'): + filters.append(damos_filter_to_dict(f)) + dict_['filters'] = filters + ops_filters = [] + for f in list_for_each_entry( + 'struct damos_filter', scheme.ops_filters.address_of_(), 'list'): + ops_filters.append(damos_filter_to_dict(f)) + dict_['ops_filters'] = ops_filters + + return dict_ + +def schemes_to_list(schemes): + return [scheme_to_dict(s) + for s in list_for_each_entry( + 'struct damos', schemes.address_of_(), 'list')] + +def damon_ctx_to_dict(ctx): + return to_dict(ctx, [ + ['ops', ops_to_dict], + ['attrs', attrs_to_dict], + ['adaptive_targets', targets_to_list], + ['schemes', schemes_to_list], + ]) + +def main(): + if len(sys.argv) < 3: + print('Usage: %s <kdamond pid> <file>' % sys.argv[0]) + exit(1) + + pid = int(sys.argv[1]) + file_to_store = sys.argv[2] + + kthread_data = cast('struct kthread *', + find_task(prog, pid).worker_private).data + ctx = cast('struct damon_ctx *', kthread_data) + status = {'contexts': [damon_ctx_to_dict(ctx)]} + if file_to_store == 'stdout': + print(json.dumps(status, indent=4)) + else: + with open(file_to_store, 'w') as f: + json.dump(status, f, indent=4) + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh index 61b80197c896..1e4849db78a9 100755 --- a/tools/testing/selftests/damon/lru_sort.sh +++ b/tools/testing/selftests/damon/lru_sort.sh @@ -1,14 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source _common.sh + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -if [ $EUID -ne 0 ] -then - echo "Run as root" - exit $ksft_skip -fi +check_dependencies damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled" if [ ! -f "$damon_lru_sort_enabled" ] diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh index 78dbc2334cbe..e56ceb035129 100755 --- a/tools/testing/selftests/damon/reclaim.sh +++ b/tools/testing/selftests/damon/reclaim.sh @@ -1,14 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source _common.sh + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -if [ $EUID -ne 0 ] -then - echo "Run as root" - exit $ksft_skip -fi +check_dependencies damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled" if [ ! -f "$damon_reclaim_enabled" ] diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py new file mode 100755 index 000000000000..2666c6f0f1a5 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import json +import os +import subprocess + +import _damon_sysfs + +def dump_damon_status_dict(pid): + try: + subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL) + except: + return None, 'drgn not found' + file_dir = os.path.dirname(os.path.abspath(__file__)) + dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py') + rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'], + stderr=subprocess.DEVNULL) + if rc != 0: + return None, 'drgn fail' + try: + with open('damon_dump_output', 'r') as f: + return json.load(f), None + except Exception as e: + return None, 'json.load fail (%s)' % e + +def fail(expectation, status): + print('unexpected %s' % expectation) + print(json.dumps(status, indent=4)) + exit(1) + +def assert_true(condition, expectation, status): + if condition is not True: + fail(expectation, status) + +def assert_watermarks_committed(watermarks, dump): + wmark_metric_val = { + 'none': 0, + 'free_mem_rate': 1, + } + assert_true(dump['metric'] == wmark_metric_val[watermarks.metric], + 'metric', dump) + assert_true(dump['interval'] == watermarks.interval, 'interval', dump) + assert_true(dump['high'] == watermarks.high, 'high', dump) + assert_true(dump['mid'] == watermarks.mid, 'mid', dump) + assert_true(dump['low'] == watermarks.low, 'low', dump) + +def assert_quota_goal_committed(qgoal, dump): + metric_val = { + 'user_input': 0, + 'some_mem_psi_us': 1, + 'node_mem_used_bp': 2, + 'node_mem_free_bp': 3, + } + assert_true(dump['metric'] == metric_val[qgoal.metric], 'metric', dump) + assert_true(dump['target_value'] == qgoal.target_value, 'target_value', + dump) + if qgoal.metric == 'user_input': + assert_true(dump['current_value'] == qgoal.current_value, + 'current_value', dump) + assert_true(dump['nid'] == qgoal.nid, 'nid', dump) + +def assert_quota_committed(quota, dump): + assert_true(dump['reset_interval'] == quota.reset_interval_ms, + 'reset_interval', dump) + assert_true(dump['ms'] == quota.ms, 'ms', dump) + assert_true(dump['sz'] == quota.sz, 'sz', dump) + for idx, qgoal in enumerate(quota.goals): + assert_quota_goal_committed(qgoal, dump['goals'][idx]) + assert_true(dump['weight_sz'] == quota.weight_sz_permil, 'weight_sz', dump) + assert_true(dump['weight_nr_accesses'] == quota.weight_nr_accesses_permil, + 'weight_nr_accesses', dump) + assert_true( + dump['weight_age'] == quota.weight_age_permil, 'weight_age', dump) + + +def assert_migrate_dests_committed(dests, dump): + assert_true(dump['nr_dests'] == len(dests.dests), 'nr_dests', dump) + for idx, dest in enumerate(dests.dests): + assert_true(dump['node_id_arr'][idx] == dest.id, 'node_id', dump) + assert_true(dump['weight_arr'][idx] == dest.weight, 'weight', dump) + +def assert_filter_committed(filter_, dump): + assert_true(filter_.type_ == dump['type'], 'type', dump) + assert_true(filter_.matching == dump['matching'], 'matching', dump) + assert_true(filter_.allow == dump['allow'], 'allow', dump) + # TODO: check memcg_path and memcg_id if type is memcg + if filter_.type_ == 'addr': + assert_true([filter_.addr_start, filter_.addr_end] == + dump['addr_range'], 'addr_range', dump) + elif filter_.type_ == 'target': + assert_true(filter_.target_idx == dump['target_idx'], 'target_idx', + dump) + elif filter_.type_ == 'hugepage_size': + assert_true([filter_.min_, filter_.max_] == dump['sz_range'], + 'sz_range', dump) + +def assert_access_pattern_committed(pattern, dump): + assert_true(dump['min_sz_region'] == pattern.size[0], 'min_sz_region', + dump) + assert_true(dump['max_sz_region'] == pattern.size[1], 'max_sz_region', + dump) + assert_true(dump['min_nr_accesses'] == pattern.nr_accesses[0], + 'min_nr_accesses', dump) + assert_true(dump['max_nr_accesses'] == pattern.nr_accesses[1], + 'max_nr_accesses', dump) + assert_true(dump['min_age_region'] == pattern.age[0], 'min_age_region', + dump) + assert_true(dump['max_age_region'] == pattern.age[1], 'miaxage_region', + dump) + +def assert_scheme_committed(scheme, dump): + assert_access_pattern_committed(scheme.access_pattern, dump['pattern']) + action_val = { + 'willneed': 0, + 'cold': 1, + 'pageout': 2, + 'hugepage': 3, + 'nohugeapge': 4, + 'lru_prio': 5, + 'lru_deprio': 6, + 'migrate_hot': 7, + 'migrate_cold': 8, + 'stat': 9, + } + assert_true(dump['action'] == action_val[scheme.action], 'action', dump) + assert_true(dump['apply_interval_us'] == scheme. apply_interval_us, + 'apply_interval_us', dump) + assert_true(dump['target_nid'] == scheme.target_nid, 'target_nid', dump) + assert_migrate_dests_committed(scheme.dests, dump['migrate_dests']) + assert_quota_committed(scheme.quota, dump['quota']) + assert_watermarks_committed(scheme.watermarks, dump['wmarks']) + # TODO: test filters directory + for idx, f in enumerate(scheme.core_filters.filters): + assert_filter_committed(f, dump['filters'][idx]) + for idx, f in enumerate(scheme.ops_filters.filters): + assert_filter_committed(f, dump['ops_filters'][idx]) + +def assert_schemes_committed(schemes, dump): + assert_true(len(schemes) == len(dump), 'len_schemes', dump) + for idx, scheme in enumerate(schemes): + assert_scheme_committed(scheme, dump[idx]) + +def assert_monitoring_attrs_committed(attrs, dump): + assert_true(dump['sample_interval'] == attrs.sample_us, 'sample_interval', + dump) + assert_true(dump['aggr_interval'] == attrs.aggr_us, 'aggr_interval', dump) + assert_true(dump['intervals_goal']['access_bp'] == + attrs.intervals_goal.access_bp, 'access_bp', + dump['intervals_goal']) + assert_true(dump['intervals_goal']['aggrs'] == attrs.intervals_goal.aggrs, + 'aggrs', dump['intervals_goal']) + assert_true(dump['intervals_goal']['min_sample_us'] == + attrs.intervals_goal.min_sample_us, 'min_sample_us', + dump['intervals_goal']) + assert_true(dump['intervals_goal']['max_sample_us'] == + attrs.intervals_goal.max_sample_us, 'max_sample_us', + dump['intervals_goal']) + + assert_true(dump['ops_update_interval'] == attrs.update_us, + 'ops_update_interval', dump) + assert_true(dump['min_nr_regions'] == attrs.min_nr_regions, + 'min_nr_regions', dump) + assert_true(dump['max_nr_regions'] == attrs.max_nr_regions, + 'max_nr_regions', dump) + +def assert_ctx_committed(ctx, dump): + ops_val = { + 'vaddr': 0, + 'fvaddr': 1, + 'paddr': 2, + } + assert_true(dump['ops']['id'] == ops_val[ctx.ops], 'ops_id', dump) + assert_monitoring_attrs_committed(ctx.monitoring_attrs, dump['attrs']) + assert_schemes_committed(ctx.schemes, dump['schemes']) + +def assert_ctxs_committed(ctxs, dump): + assert_true(len(ctxs) == len(dump), 'ctxs length', dump) + for idx, ctx in enumerate(ctxs): + assert_ctx_committed(ctx, dump[idx]) + +def main(): + kdamonds = _damon_sysfs.Kdamonds( + [_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + targets=[_damon_sysfs.DamonTarget(pid=-1)], + schemes=[_damon_sysfs.Damos()], + )])]) + err = kdamonds.start() + if err is not None: + print('kdamond start failed: %s' % err) + exit(1) + + status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print(err) + kdamonds.stop() + exit(1) + + assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts']) + + context = _damon_sysfs.DamonCtx( + monitoring_attrs=_damon_sysfs.DamonAttrs( + sample_us=100000, aggr_us=2000000, + intervals_goal=_damon_sysfs.IntervalsGoal( + access_bp=400, aggrs=3, min_sample_us=5000, + max_sample_us=10000000), + update_us=2000000), + schemes=[_damon_sysfs.Damos( + action='pageout', + access_pattern=_damon_sysfs.DamosAccessPattern( + size=[4096, 2**10], + nr_accesses=[3, 317], + age=[5,71]), + quota=_damon_sysfs.DamosQuota( + sz=100*1024*1024, ms=100, + goals=[_damon_sysfs.DamosQuotaGoal( + metric='node_mem_used_bp', + target_value=9950, + nid=1)], + reset_interval_ms=1500, + weight_sz_permil=20, + weight_nr_accesses_permil=200, + weight_age_permil=1000), + watermarks=_damon_sysfs.DamosWatermarks( + metric = 'free_mem_rate', interval = 500000, # 500 ms + high = 500, mid = 400, low = 50), + target_nid=1, + apply_interval_us=1000000, + dests=_damon_sysfs.DamosDests( + dests=[_damon_sysfs.DamosDest(id=1, weight=30), + _damon_sysfs.DamosDest(id=0, weight=70)]), + core_filters=[ + _damon_sysfs.DamosFilter(type_='addr', matching=True, + allow=False, addr_start=42, + addr_end=4242), + ], + ops_filters=[ + _damon_sysfs.DamosFilter(type_='anon', matching=True, + allow=True), + ], + )]) + context.idx = 0 + context.kdamond = kdamonds.kdamonds[0] + kdamonds.kdamonds[0].contexts = [context] + kdamonds.kdamonds[0].commit() + + status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print(err) + exit(1) + + assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts']) + + # test online commitment of minimum context. + context = _damon_sysfs.DamonCtx() + context.idx = 0 + context.kdamond = kdamonds.kdamonds[0] + kdamonds.kdamonds[0].contexts = [context] + kdamonds.kdamonds[0].commit() + + status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print(err) + exit(1) + + assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts']) + + kdamonds.stop() + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index e9a976d296e2..83e3b7f63d81 100755 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source _common.sh + # Kselftest frmework requirement - SKIP code is 4. ksft_skip=4 @@ -364,14 +366,5 @@ test_damon_sysfs() test_kdamonds "$damon_sysfs/kdamonds" } -check_dependencies() -{ - if [ $EUID -ne 0 ] - then - echo "Run as root" - exit $ksft_skip - fi -} - check_dependencies test_damon_sysfs "/sys/kernel/mm/damon/admin" diff --git a/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh new file mode 100755 index 000000000000..64c5d8c518a4 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +if [ $EUID -ne 0 ] +then + echo "Run as root" + exit $ksft_skip +fi + +damon_sysfs="/sys/kernel/mm/damon/admin" +if [ ! -d "$damon_sysfs" ] +then + echo "damon sysfs not found" + exit $ksft_skip +fi + +# ensure filter directory +echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes" +echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/nr_filters" + +filter_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/0" + +before_kb=$(grep Slab /proc/meminfo | awk '{print $2}') + +# try to leak 3000 KiB +for i in {1..102400}; +do + echo "012345678901234567890123456789" > "$filter_dir/memcg_path" +done + +after_kb=$(grep Slab /proc/meminfo | awk '{print $2}') +# expect up to 1500 KiB free from other tasks memory +expected_after_kb_max=$((before_kb + 1500)) + +if [ "$after_kb" -gt "$expected_after_kb_max" ] +then + echo "maybe memcg_path are leaking: $before_kb -> $after_kb" + exit 1 +else + exit 0 +fi diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh index ade35576e748..35fc32beeaf7 100755 --- a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh +++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh @@ -1,14 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source _common.sh + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -if [ $EUID -ne 0 ] -then - echo "Run as root" - exit $ksft_skip -fi +check_dependencies damon_sysfs="/sys/kernel/mm/damon/admin" if [ ! -d "$damon_sysfs" ] diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c index 6062723a172e..77aa2897e79f 100644 --- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -138,7 +138,7 @@ int main(int argc, char *argv[]) void *addr1, *addr2; ksft_print_header(); - ksft_set_plan(6); + ksft_set_plan(7); devfd = open("/dev/udmabuf", O_RDWR); if (devfd < 0) { @@ -250,6 +250,24 @@ int main(int argc, char *argv[]) close(buf); close(memfd); + + /* same test as above but we pin first before writing to memfd */ + page_size = getpagesize() * 512; /* 2 MB */ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, true); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-7]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-7]\n", TEST_PREFIX); + + close(buf); + close(memfd); close(devfd); ksft_print_msg("%s: ok\n", TEST_PREFIX); diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py index 3a465768e507..5175cf235b2f 100644 --- a/tools/testing/selftests/hid/tests/base.py +++ b/tools/testing/selftests/hid/tests/base.py @@ -5,6 +5,7 @@ # Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com> # Copyright (c) 2017 Red Hat, Inc. +import dataclasses import libevdev import os import pytest @@ -145,6 +146,18 @@ class UHIDTestDevice(BaseDevice): self.name = name +@dataclasses.dataclass +class HidBpf: + object_name: str + has_rdesc_fixup: bool + + +@dataclasses.dataclass +class KernelModule: + driver_name: str + module_name: str + + class BaseTestCase: class TestUhid(object): syn_event = libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT) # type: ignore @@ -155,20 +168,20 @@ class BaseTestCase: # List of kernel modules to load before starting the test # if any module is not available (not compiled), the test will skip. - # Each element is a tuple '(kernel driver name, kernel module)', - # for example ("playstation", "hid-playstation") - kernel_modules: List[Tuple[str, str]] = [] + # Each element is a KernelModule object, for example + # KernelModule("playstation", "hid-playstation") + kernel_modules: List[KernelModule] = [] # List of in kernel HID-BPF object files to load # before starting the test # Any existing pre-loaded HID-BPF module will be removed # before the ones in this list will be manually loaded. - # Each Element is a tuple '(hid_bpf_object, rdesc_fixup_present)', - # for example '("xppen-ArtistPro16Gen2.bpf.o", True)' - # If 'rdesc_fixup_present' is True, the test needs to wait + # Each Element is a HidBpf object, for example + # 'HidBpf("xppen-ArtistPro16Gen2.bpf.o", True)' + # If 'has_rdesc_fixup' is True, the test needs to wait # for one unbind and rebind before it can be sure the kernel is # ready - hid_bpfs: List[Tuple[str, bool]] = [] + hid_bpfs: List[HidBpf] = [] def assertInputEventsIn(self, expected_events, effective_events): effective_events = effective_events.copy() @@ -232,25 +245,26 @@ class BaseTestCase: @pytest.fixture() def load_kernel_module(self): - for kernel_driver, kernel_module in self.kernel_modules: - self._load_kernel_module(kernel_driver, kernel_module) + for k in self.kernel_modules: + self._load_kernel_module(k.driver_name, k.module_name) yield def load_hid_bpfs(self): + # this function will only work when run in the kernel tree script_dir = Path(os.path.dirname(os.path.realpath(__file__))) root_dir = (script_dir / "../../../../..").resolve() bpf_dir = root_dir / "drivers/hid/bpf/progs" + if not bpf_dir.exists(): + pytest.skip("looks like we are not in the kernel tree, skipping") + udev_hid_bpf = shutil.which("udev-hid-bpf") if not udev_hid_bpf: pytest.skip("udev-hid-bpf not found in $PATH, skipping") - wait = False - for _, rdesc_fixup in self.hid_bpfs: - if rdesc_fixup: - wait = True + wait = any(b.has_rdesc_fixup for b in self.hid_bpfs) - for hid_bpf, _ in self.hid_bpfs: + for hid_bpf in self.hid_bpfs: # We need to start `udev-hid-bpf` in the background # and dispatch uhid events in case the kernel needs # to fetch features on the device @@ -260,13 +274,13 @@ class BaseTestCase: "--verbose", "add", str(self.uhdev.sys_path), - str(bpf_dir / hid_bpf), + str(bpf_dir / hid_bpf.object_name), ], ) while process.poll() is None: self.uhdev.dispatch(1) - if process.poll() != 0: + if process.returncode != 0: pytest.fail( f"Couldn't insert hid-bpf program '{hid_bpf}', marking the test as failed" ) diff --git a/tools/testing/selftests/hid/tests/base_device.py b/tools/testing/selftests/hid/tests/base_device.py index e0515be97f83..59465c58d94d 100644 --- a/tools/testing/selftests/hid/tests/base_device.py +++ b/tools/testing/selftests/hid/tests/base_device.py @@ -18,10 +18,12 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +import dataclasses import fcntl import functools import libevdev import os +import threading try: import pyudev @@ -104,6 +106,12 @@ class PowerSupply(object): return self._type.str_value +@dataclasses.dataclass +class HidReadiness: + is_ready: bool = False + count: int = 0 + + class HIDIsReady(object): """ Companion class that binds to a kernel mechanism @@ -115,18 +123,18 @@ class HIDIsReady(object): def __init__(self: "HIDIsReady", uhid: UHIDDevice) -> None: self.uhid = uhid - def is_ready(self: "HIDIsReady") -> bool: + def is_ready(self: "HIDIsReady") -> HidReadiness: """ Overwrite in subclasses: should return True or False whether the attached uhid device is ready or not. """ - return False + return HidReadiness() class UdevHIDIsReady(HIDIsReady): _pyudev_context: ClassVar[Optional[pyudev.Context]] = None _pyudev_monitor: ClassVar[Optional[pyudev.Monitor]] = None - _uhid_devices: ClassVar[Dict[int, Tuple[bool, int]]] = {} + _uhid_devices: ClassVar[Dict[int, HidReadiness]] = {} def __init__(self: "UdevHIDIsReady", uhid: UHIDDevice) -> None: super().__init__(uhid) @@ -157,18 +165,19 @@ class UdevHIDIsReady(HIDIsReady): id = int(event.sys_path.strip().split(".")[-1], 16) - device_ready, count = cls._uhid_devices.get(id, (False, 0)) + readiness = cls._uhid_devices.setdefault(id, HidReadiness()) ready = event.action == "bind" - if not device_ready and ready: - count += 1 - cls._uhid_devices[id] = (ready, count) + if not readiness.is_ready and ready: + readiness.count += 1 + + readiness.is_ready = ready - def is_ready(self: "UdevHIDIsReady") -> Tuple[bool, int]: + def is_ready(self: "UdevHIDIsReady") -> HidReadiness: try: return self._uhid_devices[self.uhid.hid_id] except KeyError: - return (False, 0) + return HidReadiness() class EvdevMatch(object): @@ -322,11 +331,11 @@ class BaseDevice(UHIDDevice): @property def kernel_is_ready(self: "BaseDevice") -> bool: - return self._kernel_is_ready.is_ready()[0] and self.started + return self._kernel_is_ready.is_ready().is_ready and self.started @property def kernel_ready_count(self: "BaseDevice") -> int: - return self._kernel_is_ready.is_ready()[1] + return self._kernel_is_ready.is_ready().count @property def input_nodes(self: "BaseDevice") -> List[EvdevDevice]: @@ -336,10 +345,28 @@ class BaseDevice(UHIDDevice): if not self.kernel_is_ready or not self.started: return [] + # Starting with kernel v6.16, an event is emitted when + # userspace opens a kernel device, and for some devices + # this translates into a SET_REPORT. + # Because EvdevDevice(path) opens every single evdev node + # we need to have a separate thread to process the incoming + # SET_REPORT or we end up having to wait for the kernel + # timeout of 5 seconds. + done = False + + def dispatch(): + while not done: + self.dispatch(1) + + t = threading.Thread(target=dispatch) + t.start() + self._input_nodes = [ EvdevDevice(path) for path in self.walk_sysfs("input", "input/input*/event*") ] + done = True + t.join() return self._input_nodes def match_evdev_rule(self, application, evdev): diff --git a/tools/testing/selftests/hid/tests/test_apple_keyboard.py b/tools/testing/selftests/hid/tests/test_apple_keyboard.py index f81071d46166..0e17588b945c 100644 --- a/tools/testing/selftests/hid/tests/test_apple_keyboard.py +++ b/tools/testing/selftests/hid/tests/test_apple_keyboard.py @@ -8,13 +8,14 @@ from .test_keyboard import ArrayKeyboard, TestArrayKeyboard from hidtools.util import BusType +from . import base import libevdev import logging logger = logging.getLogger("hidtools.test.apple-keyboard") -KERNEL_MODULE = ("apple", "hid-apple") +KERNEL_MODULE = base.KernelModule("apple", "hid-apple") class KbdData(object): diff --git a/tools/testing/selftests/hid/tests/test_gamepad.py b/tools/testing/selftests/hid/tests/test_gamepad.py index 8d5b5ffdae49..612197805931 100644 --- a/tools/testing/selftests/hid/tests/test_gamepad.py +++ b/tools/testing/selftests/hid/tests/test_gamepad.py @@ -12,6 +12,7 @@ import pytest from .base_gamepad import BaseGamepad, JoystickGamepad, AxisMapping from hidtools.util import BusType +from .base import HidBpf import logging @@ -654,7 +655,7 @@ class TestAsusGamepad(BaseTest.TestGamepad): class TestRaptorMach2Joystick(BaseTest.TestGamepad): - hid_bpfs = [("FR-TEC__Raptor-Mach-2.bpf.o", True)] + hid_bpfs = [HidBpf("FR-TEC__Raptor-Mach-2.bpf.o", True)] def create_device(self): return RaptorMach2Joystick( diff --git a/tools/testing/selftests/hid/tests/test_ite_keyboard.py b/tools/testing/selftests/hid/tests/test_ite_keyboard.py index 38550c167bae..f695eaad1648 100644 --- a/tools/testing/selftests/hid/tests/test_ite_keyboard.py +++ b/tools/testing/selftests/hid/tests/test_ite_keyboard.py @@ -11,10 +11,11 @@ from hidtools.util import BusType import libevdev import logging +from . import base logger = logging.getLogger("hidtools.test.ite-keyboard") -KERNEL_MODULE = ("itetech", "hid_ite") +KERNEL_MODULE = base.KernelModule("itetech", "hid_ite") class KbdData(object): diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py index 4265012231c6..5d2ffa3d5977 100644 --- a/tools/testing/selftests/hid/tests/test_multitouch.py +++ b/tools/testing/selftests/hid/tests/test_multitouch.py @@ -17,7 +17,7 @@ import time logger = logging.getLogger("hidtools.test.multitouch") -KERNEL_MODULE = ("hid-multitouch", "hid_multitouch") +KERNEL_MODULE = base.KernelModule("hid-multitouch", "hid_multitouch") def BIT(x): diff --git a/tools/testing/selftests/hid/tests/test_sony.py b/tools/testing/selftests/hid/tests/test_sony.py index 7e52c28e59c5..7fd3a8e6137d 100644 --- a/tools/testing/selftests/hid/tests/test_sony.py +++ b/tools/testing/selftests/hid/tests/test_sony.py @@ -7,6 +7,7 @@ # from .base import application_matches +from .base import KernelModule from .test_gamepad import BaseTest from hidtools.device.sony_gamepad import ( PS3Controller, @@ -24,9 +25,9 @@ import pytest logger = logging.getLogger("hidtools.test.sony") -PS3_MODULE = ("sony", "hid_sony") -PS4_MODULE = ("playstation", "hid_playstation") -PS5_MODULE = ("playstation", "hid_playstation") +PS3_MODULE = KernelModule("sony", "hid_sony") +PS4_MODULE = KernelModule("playstation", "hid_playstation") +PS5_MODULE = KernelModule("playstation", "hid_playstation") class SonyBaseTest: diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index a9e2de1e8861..50d5699812bb 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -10,6 +10,7 @@ from . import base import copy from enum import Enum from hidtools.util import BusType +from .base import HidBpf import libevdev import logging import pytest @@ -1228,9 +1229,9 @@ class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer): pen.current_state = state def call_input_event(self, report): - if report[0] == 0x0a: + if report[0] == 0x0A: # ensures the original second Eraser usage is null - report[1] &= 0xdf + report[1] &= 0xDF # ensures the original last bit is equal to bit 6 (In Range) if report[1] & 0x40: @@ -1472,7 +1473,7 @@ class TestGoodix_27c6_0e00(BaseTest.TestTablet): class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet): - hid_bpfs = [("XPPen__ArtistPro16Gen2.bpf.o", True)] + hid_bpfs = [HidBpf("XPPen__ArtistPro16Gen2.bpf.o", True)] def create_device(self): dev = XPPen_ArtistPro16Gen2_28bd_095b( @@ -1484,7 +1485,7 @@ class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet): class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet): - hid_bpfs = [("XPPen__Artist24.bpf.o", True)] + hid_bpfs = [HidBpf("XPPen__Artist24.bpf.o", True)] def create_device(self): return XPPen_Artist24_28bd_093a( @@ -1495,7 +1496,7 @@ class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet): class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet): - hid_bpfs = [("Huion__Kamvas-Pro-19.bpf.o", True)] + hid_bpfs = [HidBpf("Huion__Kamvas-Pro-19.bpf.o", True)] def create_device(self): return Huion_Kamvas_Pro_19_256c_006b( diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index b62c7dba6777..2d6d04f0ff80 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -40,7 +40,7 @@ import logging logger = logging.getLogger("hidtools.test.wacom") -KERNEL_MODULE = ("wacom", "wacom") +KERNEL_MODULE = base.KernelModule("wacom", "wacom") class ProximityState(Enum): @@ -892,9 +892,9 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest locations. The value of `t` may be incremented over time to move the points along a linear path. """ - return [ self.make_contact(id, t) for id in range(0, n) ] + return [self.make_contact(id, t) for id in range(0, n)] - def assert_contact(self, uhdev, evdev, contact_ids, t=0): + def assert_contact(self, evdev, contact_ids, t=0): """ Assert properties of a contact generated by make_contact. """ @@ -916,12 +916,12 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_X] == x assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_Y] == y - def assert_contacts(self, uhdev, evdev, data, t=0): + def assert_contacts(self, evdev, data, t=0): """ Assert properties of a list of contacts generated by make_contacts. """ for contact_ids in data: - self.assert_contact(uhdev, evdev, contact_ids, t) + self.assert_contact(evdev, contact_ids, t) def test_contact_id_0(self): """ @@ -997,12 +997,16 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events - self.assert_contacts(uhdev, evdev, - [ self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), - self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), - self.ContactIds(contact_id = 2, tracking_id = -1, slot_num = None), - self.ContactIds(contact_id = 3, tracking_id = 1, slot_num = 1), - self.ContactIds(contact_id = 4, tracking_id = -1, slot_num = None) ]) + self.assert_contacts( + evdev, + [ + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None), + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + self.ContactIds(contact_id=2, tracking_id=-1, slot_num=None), + self.ContactIds(contact_id=3, tracking_id=1, slot_num=1), + self.ContactIds(contact_id=4, tracking_id=-1, slot_num=None), + ], + ) def confidence_change_assert_playback(self, uhdev, evdev, timeline): """ @@ -1026,8 +1030,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest events = uhdev.next_sync_events() self.debug_reports(r, uhdev, events) - ids = [ x[0] for x in state ] - self.assert_contacts(uhdev, evdev, ids, t) + ids = [x[0] for x in state] + self.assert_contacts(evdev, ids, t) t += 1 @@ -1044,27 +1048,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident - # Both fingers confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident - # First finger looses confidence and clears only the tipswitch flag - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=0, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident + # First finger looses confidence and clears only the tipswitch flag + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + ], + ) def test_confidence_loss_b(self): """ @@ -1079,27 +1124,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident - # Both fingers confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger looses confidence and has both flags cleared simultaneously - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=0, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger looses confidence and has both flags cleared simultaneously + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + ], + ) def test_confidence_loss_c(self): """ @@ -1113,27 +1199,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident - # Both fingers confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident - # First finger looses confidence and clears only the confidence flag - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident - # First finger has lost confidence and has both flags cleared - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=0, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # First finger looses confidence and clears only the confidence flag + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + True, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + ], + ) def test_confidence_gain_a(self): """ @@ -1144,27 +1271,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident - # Only second finger is confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False), - (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], - - # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident - # First finger gains confidence - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False), - (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], - - # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident - # First finger remains confident - [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], - - # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident - # First finger remains confident - [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident + # Only second finger is confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None), + True, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + True, + True, + ), + ], + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # First finger gains confidence + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None), + True, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + True, + True, + ), + ], + # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger remains confident + [ + ( + self.ContactIds(contact_id=0, tracking_id=1, slot_num=1), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + True, + True, + ), + ], + # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger remains confident + [ + ( + self.ContactIds(contact_id=0, tracking_id=1, slot_num=1), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=0, slot_num=0), + True, + True, + ), + ], + ], + ) def test_confidence_gain_b(self): """ @@ -1175,24 +1343,65 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest uhdev = self.uhdev evdev = uhdev.get_evdev() - self.confidence_change_assert_playback(uhdev, evdev, [ - # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident - # First and second finger confidently in contact - [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident - # Firtst finger looses confidence - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident - # First finger gains confidence - [(self.ContactIds(contact_id = 0, tracking_id = 2, slot_num = 0), True, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], - - # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident - # First finger goes up - [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True), - (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] - ]) + self.confidence_change_assert_playback( + uhdev, + evdev, + [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # First and second finger confidently in contact + [ + ( + self.ContactIds(contact_id=0, tracking_id=0, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # Firtst finger looses confidence + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + True, + False, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger gains confidence + [ + ( + self.ContactIds(contact_id=0, tracking_id=2, slot_num=0), + True, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident + # First finger goes up + [ + ( + self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0), + False, + True, + ), + ( + self.ContactIds(contact_id=1, tracking_id=1, slot_num=1), + True, + True, + ), + ], + ], + ) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1926ef6b40ab..3eebf5e3b974 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -766,19 +766,34 @@ TEST_F(iommufd_ioas, get_hw_info) uint8_t max_pasid = 0; /* Provide a zero-size user_buffer */ - test_cmd_get_hw_info(self->device_id, NULL, 0); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_DEFAULT, NULL, 0); /* Provide a user_buffer with exact size */ - test_cmd_get_hw_info(self->device_id, &buffer_exact, sizeof(buffer_exact)); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact, + sizeof(buffer_exact)); + + /* Request for a wrong data_type, and a correct one */ + test_err_get_hw_info(EOPNOTSUPP, self->device_id, + IOMMU_HW_INFO_TYPE_SELFTEST + 1, + &buffer_exact, sizeof(buffer_exact)); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_SELFTEST, &buffer_exact, + sizeof(buffer_exact)); /* * Provide a user_buffer with size larger than the exact size to check if * kernel zero the trailing bytes. */ - test_cmd_get_hw_info(self->device_id, &buffer_larger, sizeof(buffer_larger)); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger, + sizeof(buffer_larger)); /* * Provide a user_buffer with size smaller than the exact size to check if * the fields within the size range still gets updated. */ - test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller)); + test_cmd_get_hw_info(self->device_id, + IOMMU_HW_INFO_TYPE_DEFAULT, + &buffer_smaller, sizeof(buffer_smaller)); test_cmd_get_hw_info_pasid(self->device_id, &max_pasid); ASSERT_EQ(0, max_pasid); if (variant->pasid_capable) { @@ -788,9 +803,11 @@ TEST_F(iommufd_ioas, get_hw_info) } } else { test_err_get_hw_info(ENOENT, self->device_id, - &buffer_exact, sizeof(buffer_exact)); + IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact, + sizeof(buffer_exact)); test_err_get_hw_info(ENOENT, self->device_id, - &buffer_larger, sizeof(buffer_larger)); + IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger, + sizeof(buffer_larger)); } } @@ -953,6 +970,33 @@ TEST_F(iommufd_ioas, area_auto_iova) test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1)); } +/* https://lore.kernel.org/r/685af644.a00a0220.2e5631.0094.GAE@google.com */ +TEST_F(iommufd_ioas, reserved_overflow) +{ + struct iommu_test_cmd test_cmd = { + .size = sizeof(test_cmd), + .op = IOMMU_TEST_OP_ADD_RESERVED, + .id = self->ioas_id, + .add_reserved.start = 6, + }; + unsigned int map_len; + __u64 iova; + + if (PAGE_SIZE == 4096) { + test_cmd.add_reserved.length = 0xffffffffffff8001; + map_len = 0x5000; + } else { + test_cmd.add_reserved.length = + 0xffffffffffffffff - MOCK_PAGE_SIZE * 16; + map_len = MOCK_PAGE_SIZE * 10; + } + + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED), + &test_cmd)); + test_err_ioctl_ioas_map(ENOSPC, buffer, map_len, &iova); +} + TEST_F(iommufd_ioas, area_allowed) { struct iommu_test_cmd test_cmd = { @@ -2193,8 +2237,7 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability) test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id); test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); - test_cmd_get_hw_capabilities(self->idev_id, caps, - IOMMU_HW_CAP_DIRTY_TRACKING); + test_cmd_get_hw_capabilities(self->idev_id, caps); ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING, caps & IOMMU_HW_CAP_DIRTY_TRACKING); @@ -2706,7 +2749,7 @@ FIXTURE_SETUP(iommufd_viommu) /* Allocate a vIOMMU taking refcount of the parent hwpt */ test_cmd_viommu_alloc(self->device_id, self->hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, &self->viommu_id); /* Allocate a regular nested hwpt */ @@ -2745,24 +2788,27 @@ TEST_F(iommufd_viommu, viommu_negative_tests) if (self->device_id) { /* Negative test -- invalid hwpt (hwpt_id=0) */ test_err_viommu_alloc(ENOENT, device_id, 0, - IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, + NULL); /* Negative test -- not a nesting parent hwpt */ test_cmd_hwpt_alloc(device_id, ioas_id, 0, &hwpt_id); test_err_viommu_alloc(EINVAL, device_id, hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, + NULL); test_ioctl_destroy(hwpt_id); /* Negative test -- unsupported viommu type */ test_err_viommu_alloc(EOPNOTSUPP, device_id, self->hwpt_id, - 0xdead, NULL); + 0xdead, NULL, 0, NULL); EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->hwpt_id)); EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->viommu_id)); } else { test_err_viommu_alloc(ENOENT, self->device_id, self->hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, + NULL); } } @@ -2778,35 +2824,66 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) uint32_t fault_fd; uint32_t vdev_id; - if (self->device_id) { - test_ioctl_fault_alloc(&fault_id, &fault_fd); - test_err_hwpt_alloc_iopf( - ENOENT, dev_id, viommu_id, UINT32_MAX, - IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, - IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); - test_err_hwpt_alloc_iopf( - EOPNOTSUPP, dev_id, viommu_id, fault_id, - IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), &iopf_hwpt_id, - IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); - test_cmd_hwpt_alloc_iopf( - dev_id, viommu_id, fault_id, IOMMU_HWPT_FAULT_ID_VALID, - &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data, - sizeof(data)); + if (!dev_id) + SKIP(return, "Skipping test for variant no_viommu"); - /* Must allocate vdevice before attaching to a nested hwpt */ - test_err_mock_domain_replace(ENOENT, self->stdev_id, - iopf_hwpt_id); - test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); - test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); - EXPECT_ERRNO(EBUSY, - _test_ioctl_destroy(self->fd, iopf_hwpt_id)); - test_cmd_trigger_iopf(dev_id, fault_fd); + test_ioctl_fault_alloc(&fault_id, &fault_fd); + test_err_hwpt_alloc_iopf(ENOENT, dev_id, viommu_id, UINT32_MAX, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_err_hwpt_alloc_iopf(EOPNOTSUPP, dev_id, viommu_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), + &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + test_cmd_hwpt_alloc_iopf(dev_id, viommu_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); - test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); - test_ioctl_destroy(iopf_hwpt_id); - close(fault_fd); - test_ioctl_destroy(fault_id); - } + /* Must allocate vdevice before attaching to a nested hwpt */ + test_err_mock_domain_replace(ENOENT, self->stdev_id, iopf_hwpt_id); + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, iopf_hwpt_id)); + test_cmd_trigger_iopf(dev_id, fault_fd); + + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); + test_ioctl_destroy(iopf_hwpt_id); + close(fault_fd); + test_ioctl_destroy(fault_id); +} + +TEST_F(iommufd_viommu, viommu_alloc_with_data) +{ + struct iommu_viommu_selftest data = { + .in_data = 0xbeef, + }; + uint32_t *test; + + if (!self->device_id) + SKIP(return, "Skipping test for variant no_viommu"); + + test_cmd_viommu_alloc(self->device_id, self->hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, &data, sizeof(data), + &self->viommu_id); + ASSERT_EQ(data.out_data, data.in_data); + + /* Negative mmap tests -- offset and length cannot be changed */ + test_err_mmap(ENXIO, data.out_mmap_length, + data.out_mmap_offset + PAGE_SIZE); + test_err_mmap(ENXIO, data.out_mmap_length, + data.out_mmap_offset + PAGE_SIZE * 2); + test_err_mmap(ENXIO, data.out_mmap_length / 2, data.out_mmap_offset); + test_err_mmap(ENXIO, data.out_mmap_length * 2, data.out_mmap_offset); + + /* Now do a correct mmap for a loopback test */ + test = mmap(NULL, data.out_mmap_length, PROT_READ | PROT_WRITE, + MAP_SHARED, self->fd, data.out_mmap_offset); + ASSERT_NE(MAP_FAILED, test); + ASSERT_EQ(data.in_data, *test); + + /* The owner of the mmap region should be blocked */ + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->viommu_id)); + munmap(test, data.out_mmap_length); } TEST_F(iommufd_viommu, vdevice_alloc) @@ -2867,169 +2944,234 @@ TEST_F(iommufd_viommu, vdevice_cache) uint32_t vdev_id = 0; uint32_t num_inv; - if (dev_id) { - test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); - - test_cmd_dev_check_cache_all(dev_id, - IOMMU_TEST_DEV_CACHE_DEFAULT); - - /* Check data_type by passing zero-length array */ - num_inv = 0; - test_cmd_viommu_invalidate(viommu_id, inv_reqs, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); - - /* Negative test: Invalid data_type */ - num_inv = 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); + if (!dev_id) + SKIP(return, "Skipping test for variant no_viommu"); + + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + + test_cmd_dev_check_cache_all(dev_id, IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Check data_type by passing zero-length array */ + num_inv = 0; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs), + &num_inv); + assert(!num_inv); + + /* Negative test: Invalid data_type */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: structure size sanity */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs) + 1, &num_inv); + assert(!num_inv); + + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 1, + &num_inv); + assert(!num_inv); + + /* Negative test: invalid flag is passed */ + num_inv = 1; + inv_reqs[0].flags = 0xffffffff; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid data_uptr when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, NULL, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid entry_len when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 0, + &num_inv); + assert(!num_inv); + + /* Negative test: invalid cache_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid vdev_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x9; + inv_reqs[0].cache_id = 0; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); - /* Negative test: structure size sanity */ - num_inv = 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs) + 1, &num_inv); - assert(!num_inv); - - num_inv = 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - 1, &num_inv); - assert(!num_inv); - - /* Negative test: invalid flag is passed */ - num_inv = 1; - inv_reqs[0].flags = 0xffffffff; - inv_reqs[0].vdev_id = 0x99; - test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); - - /* Negative test: invalid data_uptr when array is not empty */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - test_err_viommu_invalidate(EINVAL, viommu_id, NULL, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); - - /* Negative test: invalid entry_len when array is not empty */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - 0, &num_inv); - assert(!num_inv); - - /* Negative test: invalid cache_id */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid flags configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0xffffffff; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 1; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT); - /* Negative test: invalid vdev_id */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x9; - inv_reqs[0].cache_id = 0; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(!num_inv); + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid cache_id configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 2nd cache entry and verify */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 1; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs), + &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, 0); + test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 3rd and 4th cache entries and verify */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 2; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 3; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs), + &num_inv); + assert(num_inv == 2); + test_cmd_dev_check_cache_all(dev_id, 0); + + /* Invalidate all cache entries for nested_dev_id[1] and verify */ + num_inv = 1; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs), + &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache_all(dev_id, 0); + test_ioctl_destroy(vdev_id); +} + +TEST_F(iommufd_viommu, hw_queue) +{ + __u64 iova = MOCK_APERTURE_START, iova2; + uint32_t viommu_id = self->viommu_id; + uint32_t hw_queue_id[2]; + + if (!viommu_id) + SKIP(return, "Skipping test for variant no_viommu"); + + /* Fail IOMMU_HW_QUEUE_TYPE_DEFAULT */ + test_err_hw_queue_alloc(EOPNOTSUPP, viommu_id, + IOMMU_HW_QUEUE_TYPE_DEFAULT, 0, iova, PAGE_SIZE, + &hw_queue_id[0]); + /* Fail queue addr and length */ + test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, + 0, iova, 0, &hw_queue_id[0]); + test_err_hw_queue_alloc(EOVERFLOW, viommu_id, + IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, ~(uint64_t)0, + PAGE_SIZE, &hw_queue_id[0]); + /* Fail missing iova */ + test_err_hw_queue_alloc(ENOENT, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, + 0, iova, PAGE_SIZE, &hw_queue_id[0]); + + /* Map iova */ + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova); + test_ioctl_ioas_map(buffer + PAGE_SIZE, PAGE_SIZE, &iova2); + + /* Fail index=1 and =MAX; must start from index=0 */ + test_err_hw_queue_alloc(EIO, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1, + iova, PAGE_SIZE, &hw_queue_id[0]); + test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, + IOMMU_TEST_HW_QUEUE_MAX, iova, PAGE_SIZE, + &hw_queue_id[0]); + + /* Allocate index=0, declare ownership of the iova */ + test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, + iova, PAGE_SIZE, &hw_queue_id[0]); + /* Fail duplicated index */ + test_err_hw_queue_alloc(EEXIST, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, + 0, iova, PAGE_SIZE, &hw_queue_id[0]); + /* Fail unmap, due to iova ownership */ + test_err_ioctl_ioas_unmap(EBUSY, iova, PAGE_SIZE); + /* The 2nd page is not pinned, so it can be unmmap */ + test_ioctl_ioas_unmap(iova2, PAGE_SIZE); + + /* Allocate index=1, with an unaligned case */ + test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1, + iova + PAGE_SIZE / 2, PAGE_SIZE / 2, + &hw_queue_id[1]); + /* Fail to destroy, due to dependency */ + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hw_queue_id[0])); + + /* Destroy in descending order */ + test_ioctl_destroy(hw_queue_id[1]); + test_ioctl_destroy(hw_queue_id[0]); + /* Now it can unmap the first page */ + test_ioctl_ioas_unmap(iova, PAGE_SIZE); +} - /* - * Invalidate the 1st cache entry but fail the 2nd request - * due to invalid flags configuration in the 2nd request. - */ - num_inv = 2; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = 0; - inv_reqs[1].flags = 0xffffffff; - inv_reqs[1].vdev_id = 0x99; - inv_reqs[1].cache_id = 1; - test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 1); - test_cmd_dev_check_cache(dev_id, 0, 0); - test_cmd_dev_check_cache(dev_id, 1, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 2, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 3, - IOMMU_TEST_DEV_CACHE_DEFAULT); +TEST_F(iommufd_viommu, vdevice_tombstone) +{ + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t vdev_id = 0; - /* - * Invalidate the 1st cache entry but fail the 2nd request - * due to invalid cache_id configuration in the 2nd request. - */ - num_inv = 2; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = 0; - inv_reqs[1].flags = 0; - inv_reqs[1].vdev_id = 0x99; - inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; - test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, - IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 1); - test_cmd_dev_check_cache(dev_id, 0, 0); - test_cmd_dev_check_cache(dev_id, 1, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 2, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 3, - IOMMU_TEST_DEV_CACHE_DEFAULT); - - /* Invalidate the 2nd cache entry and verify */ - num_inv = 1; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = 1; - test_cmd_viommu_invalidate(viommu_id, inv_reqs, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 1); - test_cmd_dev_check_cache(dev_id, 0, 0); - test_cmd_dev_check_cache(dev_id, 1, 0); - test_cmd_dev_check_cache(dev_id, 2, - IOMMU_TEST_DEV_CACHE_DEFAULT); - test_cmd_dev_check_cache(dev_id, 3, - IOMMU_TEST_DEV_CACHE_DEFAULT); - - /* Invalidate the 3rd and 4th cache entries and verify */ - num_inv = 2; - inv_reqs[0].flags = 0; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].cache_id = 2; - inv_reqs[1].flags = 0; - inv_reqs[1].vdev_id = 0x99; - inv_reqs[1].cache_id = 3; - test_cmd_viommu_invalidate(viommu_id, inv_reqs, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 2); - test_cmd_dev_check_cache_all(dev_id, 0); + if (!dev_id) + SKIP(return, "Skipping test for variant no_viommu"); - /* Invalidate all cache entries for nested_dev_id[1] and verify */ - num_inv = 1; - inv_reqs[0].vdev_id = 0x99; - inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL; - test_cmd_viommu_invalidate(viommu_id, inv_reqs, - sizeof(*inv_reqs), &num_inv); - assert(num_inv == 1); - test_cmd_dev_check_cache_all(dev_id, 0); - test_ioctl_destroy(vdev_id); - } + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + test_ioctl_destroy(self->stdev_id); + EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, vdev_id)); } FIXTURE(iommufd_device_pasid) @@ -3123,8 +3265,7 @@ TEST_F(iommufd_device_pasid, pasid_attach) /* Allocate a regular nested hwpt based on viommu */ test_cmd_viommu_alloc(self->device_id, parent_hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, - &viommu_id); + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, &viommu_id); test_cmd_hwpt_alloc_nested(self->device_id, viommu_id, IOMMU_HWPT_ALLOC_PASID, &nested_hwpt_id[2], diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index e11ec4b121fc..651fc9f13c08 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -634,6 +634,7 @@ TEST_FAIL_NTH(basic_fail_nth, device) uint32_t idev_id; uint32_t hwpt_id; uint32_t viommu_id; + uint32_t hw_queue_id; uint32_t vdev_id; __u64 iova; @@ -666,8 +667,8 @@ TEST_FAIL_NTH(basic_fail_nth, device) &self->stdev_id, NULL, &idev_id)) return -1; - if (_test_cmd_get_hw_info(self->fd, idev_id, &info, - sizeof(info), NULL, NULL)) + if (_test_cmd_get_hw_info(self->fd, idev_id, IOMMU_HW_INFO_TYPE_DEFAULT, + &info, sizeof(info), NULL, NULL)) return -1; if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, @@ -688,13 +689,19 @@ TEST_FAIL_NTH(basic_fail_nth, device) IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; - if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, - IOMMU_VIOMMU_TYPE_SELFTEST, 0, &viommu_id)) + if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, 0, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, + &viommu_id)) return -1; if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id)) return -1; + if (_test_cmd_hw_queue_alloc(self->fd, viommu_id, + IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, iova, + PAGE_SIZE, &hw_queue_id)) + return -1; + if (_test_ioctl_fault_alloc(self->fd, &fault_id, &fault_fd)) return -1; close(fault_fd); diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 6e967b58acfd..3c3e08b8c90e 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -56,6 +56,10 @@ static unsigned long PAGE_SIZE; #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +#define test_err_mmap(_errno, length, offset) \ + EXPECT_ERRNO(_errno, (long)mmap(NULL, length, PROT_READ | PROT_WRITE, \ + MAP_SHARED, self->fd, offset)) + static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) { int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; @@ -762,20 +766,24 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata) #endif /* @data can be NULL */ -static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, - size_t data_len, uint32_t *capabilities, - uint8_t *max_pasid) +static int _test_cmd_get_hw_info(int fd, __u32 device_id, __u32 data_type, + void *data, size_t data_len, + uint32_t *capabilities, uint8_t *max_pasid) { struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data; struct iommu_hw_info cmd = { .size = sizeof(cmd), .dev_id = device_id, .data_len = data_len, + .in_data_type = data_type, .data_uptr = (uint64_t)data, .out_capabilities = 0, }; int ret; + if (data_type != IOMMU_HW_INFO_TYPE_DEFAULT) + cmd.flags |= IOMMU_HW_INFO_FLAG_INPUT_TYPE; + ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd); if (ret) return ret; @@ -818,20 +826,23 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, return 0; } -#define test_cmd_get_hw_info(device_id, data, data_len) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \ - data_len, NULL, NULL)) +#define test_cmd_get_hw_info(device_id, data_type, data, data_len) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data_type, \ + data, data_len, NULL, NULL)) -#define test_err_get_hw_info(_errno, device_id, data, data_len) \ - EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \ - data_len, NULL, NULL)) +#define test_err_get_hw_info(_errno, device_id, data_type, data, data_len) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_get_hw_info(self->fd, device_id, data_type, \ + data, data_len, NULL, NULL)) -#define test_cmd_get_hw_capabilities(device_id, caps, mask) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \ +#define test_cmd_get_hw_capabilities(device_id, caps) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \ + IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \ 0, &caps, NULL)) -#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \ +#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \ + IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \ 0, NULL, max_pasid)) static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd) @@ -902,7 +913,8 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 pasid, pasid, fault_fd)) static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, - __u32 type, __u32 flags, __u32 *viommu_id) + __u32 flags, __u32 type, void *data, + __u32 data_len, __u32 *viommu_id) { struct iommu_viommu_alloc cmd = { .size = sizeof(cmd), @@ -910,6 +922,8 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, .type = type, .dev_id = device_id, .hwpt_id = hwpt_id, + .data_uptr = (uint64_t)data, + .data_len = data_len, }; int ret; @@ -921,13 +935,15 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, return 0; } -#define test_cmd_viommu_alloc(device_id, hwpt_id, type, viommu_id) \ - ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ - type, 0, viommu_id)) -#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, viommu_id) \ - EXPECT_ERRNO(_errno, \ - _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ - type, 0, viommu_id)) +#define test_cmd_viommu_alloc(device_id, hwpt_id, type, data, data_len, \ + viommu_id) \ + ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \ + type, data, data_len, viommu_id)) +#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, data, \ + data_len, viommu_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \ + type, data, data_len, viommu_id)) static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id, __u64 virt_id, __u32 *vdev_id) @@ -956,6 +972,37 @@ static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id, _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ virt_id, vdev_id)) +static int _test_cmd_hw_queue_alloc(int fd, __u32 viommu_id, __u32 type, + __u32 idx, __u64 base_addr, __u64 length, + __u32 *hw_queue_id) +{ + struct iommu_hw_queue_alloc cmd = { + .size = sizeof(cmd), + .viommu_id = viommu_id, + .type = type, + .index = idx, + .nesting_parent_iova = base_addr, + .length = length, + }; + int ret; + + ret = ioctl(fd, IOMMU_HW_QUEUE_ALLOC, &cmd); + if (ret) + return ret; + if (hw_queue_id) + *hw_queue_id = cmd.out_hw_queue_id; + return 0; +} + +#define test_cmd_hw_queue_alloc(viommu_id, type, idx, base_addr, len, out_qid) \ + ASSERT_EQ(0, _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \ + base_addr, len, out_qid)) +#define test_err_hw_queue_alloc(_errno, viommu_id, type, idx, base_addr, len, \ + out_qid) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \ + base_addr, len, out_qid)) + static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type, __u32 *veventq_id, __u32 *veventq_fd) { diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 824266982aa3..f2dafa0b700b 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -38,9 +38,6 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests memfd_secret -hugetlb_dio -pkey_sighandler_tests_32 -pkey_sighandler_tests_64 soft-dirty split_huge_page_test ksm_tests diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index dbbcc5eb3dce..d30625c18259 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -72,31 +72,6 @@ static int detect_thp_sizes(size_t sizes[], int max) return count; } -static void detect_huge_zeropage(void) -{ - int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", - O_RDONLY); - size_t enabled = 0; - char buf[15]; - int ret; - - if (fd < 0) - return; - - ret = pread(fd, buf, sizeof(buf), 0); - if (ret > 0 && ret < sizeof(buf)) { - buf[ret] = 0; - - enabled = strtoul(buf, NULL, 10); - if (enabled == 1) { - has_huge_zeropage = true; - ksft_print_msg("[INFO] huge zeropage is enabled\n"); - } - } - - close(fd); -} - static bool range_is_swapped(void *addr, size_t size) { for (; size; addr += pagesize, size -= pagesize) @@ -113,11 +88,11 @@ struct comm_pipes { static int setup_comm_pipes(struct comm_pipes *comm_pipes) { if (pipe(comm_pipes->child_ready) < 0) { - ksft_perror("pipe()"); + ksft_perror("pipe() failed"); return -errno; } if (pipe(comm_pipes->parent_ready) < 0) { - ksft_perror("pipe()"); + ksft_perror("pipe() failed"); close(comm_pipes->child_ready[0]); close(comm_pipes->child_ready[1]); return -errno; @@ -268,8 +243,10 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ + ksft_print_msg("Leak from parent into child\n"); log_test_result(KSFT_XFAIL); } else { + ksft_print_msg("Leak from parent into child\n"); log_test_result(KSFT_FAIL); } close_comm_pipes: @@ -332,7 +309,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, if (before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_print_msg("vmsplice() failed\n"); + ksft_perror("vmsplice() failed\n"); log_test_result(KSFT_FAIL); goto close_pipe; } @@ -397,8 +374,10 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ + ksft_print_msg("Leak from child into parent\n"); log_test_result(KSFT_XFAIL); } else { + ksft_print_msg("Leak from child into parent\n"); log_test_result(KSFT_FAIL); } close_pipe: @@ -562,7 +541,7 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) while (total < size) { cur = pread(fd, tmp + total, size - total, total); if (cur < 0) { - ksft_print_msg("pread() failed\n"); + ksft_perror("pread() failed\n"); log_test_result(KSFT_FAIL); goto quit_child; } @@ -570,10 +549,12 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) } /* Finally, check if we read what we expected. */ - if (!memcmp(mem, tmp, size)) + if (!memcmp(mem, tmp, size)) { log_test_result(KSFT_PASS); - else + } else { + ksft_print_msg("Longtom R/W pin is not reliable\n"); log_test_result(KSFT_FAIL); + } quit_child: if (use_fork) { @@ -628,7 +609,7 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, tmp = malloc(size); if (!tmp) { - ksft_print_msg("malloc() failed\n"); + ksft_perror("malloc() failed\n"); log_test_result(KSFT_FAIL); return; } @@ -725,10 +706,12 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, ksft_perror("PIN_LONGTERM_TEST_READ failed"); log_test_result(KSFT_FAIL); } else { - if (!memcmp(mem, tmp, size)) + if (!memcmp(mem, tmp, size)) { log_test_result(KSFT_PASS); - else + } else { + ksft_print_msg("Longterm R/O pin is not reliable\n"); log_test_result(KSFT_FAIL); + } } ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); @@ -1417,10 +1400,12 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, else ret = -EINVAL; - if (!ret) + if (!ret) { log_test_result(KSFT_PASS); - else + } else { + ksft_print_msg("Leak from parent into child\n"); log_test_result(KSFT_FAIL); + } close_comm_pipes: close_comm_pipes(&comm_pipes); } @@ -1528,10 +1513,12 @@ static void test_cow(char *mem, const char *smem, size_t size) memset(mem, 0xff, size); /* See if we still read the old values via the other mapping. */ - if (!memcmp(smem, old, size)) + if (!memcmp(smem, old, size)) { log_test_result(KSFT_PASS); - else + } else { + ksft_print_msg("Other mapping modified\n"); log_test_result(KSFT_FAIL); + } free(old); } @@ -1547,7 +1534,7 @@ static void test_ro_fast_pin(char *mem, const char *smem, size_t size) static void run_with_zeropage(non_anon_test_fn fn, const char *desc) { - char *mem, *smem, tmp; + char *mem, *smem; log_test_start("%s ... with shared zeropage", desc); @@ -1567,8 +1554,8 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) } /* Read from the page to populate the shared zeropage. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); fn(mem, smem, pagesize); munmap: @@ -1579,7 +1566,7 @@ munmap: static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) { - char *mem, *smem, *mmap_mem, *mmap_smem, tmp; + char *mem, *smem, *mmap_mem, *mmap_smem; size_t mmap_size; int ret; @@ -1613,13 +1600,13 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); ret = madvise(mem, pmdsize, MADV_HUGEPAGE); - if (ret != 0) { + if (ret) { ksft_perror("madvise()"); log_test_result(KSFT_FAIL); goto munmap; } - ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); - if (ret != 0) { + ret = madvise(smem, pmdsize, MADV_HUGEPAGE); + if (ret) { ksft_perror("madvise()"); log_test_result(KSFT_FAIL); goto munmap; @@ -1630,8 +1617,8 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) * the first sub-page and test if we get another sub-page populated * automatically. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || !pagemap_is_populated(pagemap_fd, smem + pagesize)) { ksft_test_result_skip("Did not get THPs populated\n"); @@ -1647,7 +1634,7 @@ munmap: static void run_with_memfd(non_anon_test_fn fn, const char *desc) { - char *mem, *smem, tmp; + char *mem, *smem; int fd; log_test_start("%s ... with memfd", desc); @@ -1681,8 +1668,8 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc) } /* Fault the page in. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); fn(mem, smem, pagesize); munmap: @@ -1695,7 +1682,7 @@ close: static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) { - char *mem, *smem, tmp; + char *mem, *smem; FILE *file; int fd; @@ -1737,8 +1724,8 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) } /* Fault the page in. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); fn(mem, smem, pagesize); munmap: @@ -1753,7 +1740,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, size_t hugetlbsize) { int flags = MFD_HUGETLB; - char *mem, *smem, tmp; + char *mem, *smem; int fd; log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, @@ -1791,8 +1778,8 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, } /* Fault the page in. */ - tmp = *mem + *smem; - asm volatile("" : "+r" (tmp)); + FORCE_READ(mem); + FORCE_READ(smem); fn(mem, smem, hugetlbsize); munmap: @@ -1891,7 +1878,7 @@ int main(int argc, char **argv) } nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, ARRAY_SIZE(hugetlbsizes)); - detect_huge_zeropage(); + has_huge_zeropage = detect_huge_zeropage(); ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index 93af3d3760f9..b0d42eb04e3a 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -36,13 +36,6 @@ static volatile sig_atomic_t signal_jump_set; static sigjmp_buf signal_jmp_buf; /* - * Ignore the checkpatch warning, we must read from x but don't want to do - * anything with it in order to trigger a read page fault. We therefore must use - * volatile to stop the compiler from optimising this away. - */ -#define FORCE_READ(x) (*(volatile typeof(x) *)x) - -/* * How is the test backing the mapping being tested? */ enum backing_type { @@ -582,7 +575,7 @@ TEST_F(guard_regions, process_madvise) /* OK we don't have permission to do this, skip. */ if (count == -1 && errno == EPERM) - ksft_exit_skip("No process_madvise() permissions, try running as root.\n"); + SKIP(return, "No process_madvise() permissions, try running as root.\n"); /* Returns the number of bytes advised. */ ASSERT_EQ(count, 6 * page_size); diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 29047d2e0c49..268dadb8ce43 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -114,7 +114,15 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) } if (fallocate(fd, 0, 0, size)) { - if (size == pagesize) { + /* + * Some filesystems (eg, NFSv3) don't support + * fallocate(), report this as a skip rather than a + * test failure. + */ + if (errno == EOPNOTSUPP) { + ksft_print_msg("fallocate() not supported by filesystem\n"); + result = KSFT_SKIP; + } else if (size == pagesize) { ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); result = KSFT_FAIL; } else { diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c index e74107185324..1afe14b9dc0c 100644 --- a/tools/testing/selftests/mm/hugetlb-madvise.c +++ b/tools/testing/selftests/mm/hugetlb-madvise.c @@ -47,14 +47,11 @@ void write_fault_pages(void *addr, unsigned long nr_pages) void read_fault_pages(void *addr, unsigned long nr_pages) { - volatile unsigned long dummy = 0; unsigned long i; for (i = 0; i < nr_pages; i++) { - dummy += *((unsigned long *)(addr + (i * huge_page_size))); - /* Prevent the compiler from optimizing out the entire loop: */ - asm volatile("" : "+r" (dummy)); + FORCE_READ(((unsigned long *)(addr + (i * huge_page_size)))); } } diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 8a4d34cce36b..a18c50d51141 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -561,8 +561,6 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages, usleep(TICK); } - madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE); - return timeout == -1; } @@ -1190,6 +1188,11 @@ int main(int argc, char **argv) .read_ahead_kb = 0, }; + if (!thp_is_enabled()) { + printf("Transparent Hugepages not available\n"); + return KSFT_SKIP; + } + parse_test_type(argc, argv); setbuf(stdout, NULL); diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index b61803e36d1c..d8bd1911dfc0 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -393,9 +393,13 @@ static void test_unmerge_uffd_wp(void) /* See if UFFD-WP is around. */ uffdio_api.api = UFFD_API; - uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; + uffdio_api.features = 0; if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { - ksft_test_result_fail("UFFDIO_API failed\n"); + if (errno == EINVAL) + ksft_test_result_skip("The API version requested is not supported\n"); + else + ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno)); + goto close_uffd; } if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) { @@ -403,6 +407,26 @@ static void test_unmerge_uffd_wp(void) goto close_uffd; } + /* + * UFFDIO_API must only be called once to enable features. + * So we close the old userfaultfd and create a new one to + * actually enable UFFD_FEATURE_PAGEFAULT_FLAG_WP. + */ + close(uffd); + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + ksft_test_result_fail("__NR_userfaultfd failed\n"); + goto unmap; + } + + /* Now, enable it ("two-step handshake") */ + uffdio_api.api = UFFD_API; + uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { + ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno)); + goto close_uffd; + } + /* Register UFFD-WP, no need for an actual handler. */ if (uffd_register(uffd, map, size, false, true, false)) { ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n"); diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index e80deac1436b..b77462b5c240 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -15,6 +15,7 @@ #include "../kselftest.h" #include <include/vdso/time64.h> #include "vm_util.h" +#include "thp_settings.h" #define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/" #define KSM_FP(s) (KSM_SYSFS_PATH s) @@ -527,6 +528,11 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot, unsigned long scan_time_ns; int pagemap_fd, n_normal_pages, n_huge_pages; + if (!thp_is_enabled()) { + printf("Transparent Hugepages not available\n"); + return KSFT_SKIP; + } + map_size *= MB; size_t len = map_size; diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index cc26480098ae..cc4253f47f10 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -2,15 +2,18 @@ #define _GNU_SOURCE #include "../kselftest_harness.h" +#include <linux/prctl.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <sys/syscall.h> #include <sys/wait.h> #include <linux/perf_event.h> #include "vm_util.h" +#include <linux/mman.h> FIXTURE(merge) { @@ -23,7 +26,7 @@ FIXTURE_SETUP(merge) { self->page_size = psize(); /* Carve out PROT_NONE region to map over. */ - self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE, + self->carveout = mmap(NULL, 30 * self->page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); ASSERT_NE(self->carveout, MAP_FAILED); /* Setup PROCMAP_QUERY interface. */ @@ -32,8 +35,13 @@ FIXTURE_SETUP(merge) FIXTURE_TEARDOWN(merge) { - ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0); + ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0); ASSERT_EQ(close_procmap(&self->procmap), 0); + /* + * Clear unconditionally, as some tests set this. It is no issue if this + * fails (KSM may be disabled for instance). + */ + prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0); } TEST_F(merge, mprotect_unfaulted_left) @@ -498,4 +506,669 @@ out: remove(probe_file); } +TEST_F(merge, ksm_merge) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + int err; + + /* + * Map two R/W immediately adjacent to one another, they should + * trivially merge: + * + * |-----------|-----------| + * | R/W | R/W | + * |-----------|-----------| + * ptr ptr2 + */ + + ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[2 * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); + + /* Unmap the second half of this merged VMA. */ + ASSERT_EQ(munmap(ptr2, page_size), 0); + + /* OK, now enable global KSM merge. We clear this on test teardown. */ + err = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); + if (err == -1) { + int errnum = errno; + + /* Only non-failure case... */ + ASSERT_EQ(errnum, EINVAL); + /* ...but indicates we should skip. */ + SKIP(return, "KSM memory merging not supported, skipping."); + } + + /* + * Now map a VMA adjacent to the existing that was just made + * VM_MERGEABLE, this should merge as well. + */ + ptr2 = mmap(&carveout[2 * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); + + /* Now this VMA altogether. */ + ASSERT_EQ(munmap(ptr, 2 * page_size), 0); + + /* Try the same operation as before, asserting this also merges fine. */ + ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[2 * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); +} + +TEST_F(merge, mremap_unfaulted_to_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + + /* + * Map two distinct areas: + * + * |-----------| |-----------| + * | unfaulted | | unfaulted | + * |-----------| |-----------| + * ptr ptr2 + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr: + * \ + * |-----------| / |-----------| + * | faulted | \ | unfaulted | + * |-----------| / |-----------| + * ptr \ ptr2 + */ + ptr[0] = 'x'; + + /* + * Now move ptr2 adjacent to ptr: + * + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + * + * It should merge: + * + * |----------------------| + * | faulted | + * |----------------------| + * ptr + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mremap_unfaulted_behind_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + + /* + * Map two distinct areas: + * + * |-----------| |-----------| + * | unfaulted | | unfaulted | + * |-----------| |-----------| + * ptr ptr2 + */ + ptr = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr: + * \ + * |-----------| / |-----------| + * | faulted | \ | unfaulted | + * |-----------| / |-----------| + * ptr \ ptr2 + */ + ptr[0] = 'x'; + + /* + * Now move ptr2 adjacent, but behind, ptr: + * + * |-----------|-----------| + * | unfaulted | faulted | + * |-----------|-----------| + * ptr2 ptr + * + * It should merge: + * + * |----------------------| + * | faulted | + * |----------------------| + * ptr2 + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size); +} + +TEST_F(merge, mremap_unfaulted_between_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2, *ptr3; + + /* + * Map three distinct areas: + * + * |-----------| |-----------| |-----------| + * | unfaulted | | unfaulted | | unfaulted | + * |-----------| |-----------| |-----------| + * ptr ptr2 ptr3 + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Offset ptr3 further away. */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr, ptr3: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | unfaulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr \ ptr2 \ ptr3 + */ + ptr[0] = 'x'; + ptr3[0] = 'x'; + + /* + * Move ptr3 back into place, leaving a place for ptr2: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | faulted | \ | unfaulted | + * |-----------| |-----------| / |-----------| + * ptr ptr3 \ ptr2 + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + /* + * Finally, move ptr2 into place: + * + * |-----------|-----------|-----------| + * | faulted | unfaulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge, but only ptr, ptr2: + * + * |-----------------------|-----------| + * | faulted | unfaulted | + * |-----------------------|-----------| + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr3)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr3); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr3 + 5 * page_size); +} + +TEST_F(merge, mremap_unfaulted_between_faulted_unfaulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2, *ptr3; + + /* + * Map three distinct areas: + * + * |-----------| |-----------| |-----------| + * | unfaulted | | unfaulted | | unfaulted | + * |-----------| |-----------| |-----------| + * ptr ptr2 ptr3 + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Offset ptr3 further away. */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000); + ASSERT_NE(ptr3, MAP_FAILED); + + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | unfaulted | \ | unfaulted | + * |-----------| / |-----------| / |-----------| + * ptr \ ptr2 \ ptr3 + */ + ptr[0] = 'x'; + + /* + * Move ptr3 back into place, leaving a place for ptr2: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | unfaulted | \ | unfaulted | + * |-----------| |-----------| / |-----------| + * ptr ptr3 \ ptr2 + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + /* + * Finally, move ptr2 into place: + * + * |-----------|-----------|-----------| + * | faulted | unfaulted | unfaulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); +} + +TEST_F(merge, mremap_unfaulted_between_correctly_placed_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + + /* + * Map one larger area: + * + * |-----------------------------------| + * | unfaulted | + * |-----------------------------------| + */ + ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in ptr: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr[0] = 'x'; + + /* + * Unmap middle: + * + * |-----------| |-----------| + * | faulted | | faulted | + * |-----------| |-----------| + * + * Now the faulted areas are compatible with each other (anon_vma the + * same, vma->vm_pgoff equal to virtual page offset). + */ + ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); + + /* + * Map a new area, ptr2: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | faulted | \ | unfaulted | + * |-----------| |-----------| / |-----------| + * ptr \ ptr2 + */ + ptr2 = mmap(&carveout[20 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Finally, move ptr2 into place: + * + * |-----------|-----------|-----------| + * | faulted | unfaulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); +} + +TEST_F(merge, mremap_correct_placed_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2, *ptr3; + + /* + * Map one larger area: + * + * |-----------------------------------| + * | unfaulted | + * |-----------------------------------| + */ + ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in ptr: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr[0] = 'x'; + + /* + * Offset the final and middle 5 pages further away: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | faulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr \ ptr2 \ ptr3 + */ + ptr3 = &ptr[10 * page_size]; + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000); + ASSERT_NE(ptr3, MAP_FAILED); + ptr2 = &ptr[5 * page_size]; + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Move ptr2 into its correct place: + * \ + * |-----------|-----------| / |-----------| + * | faulted | faulted | \ | faulted | + * |-----------|-----------| / |-----------| + * ptr ptr2 \ ptr3 + * + * It should merge: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr \ ptr3 + */ + + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); + + /* + * Now move ptr out of place: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | faulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr2 \ ptr \ ptr3 + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Now move ptr back into place: + * \ + * |-----------|-----------| / |-----------| + * | faulted | faulted | \ | faulted | + * |-----------|-----------| / |-----------| + * ptr ptr2 \ ptr3 + * + * It should merge: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr \ ptr3 + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]); + ASSERT_NE(ptr, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); + + /* + * Now move ptr out of place again: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | faulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr2 \ ptr \ ptr3 + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Now move ptr3 back into place: + * \ + * |-----------|-----------| / |-----------| + * | faulted | faulted | \ | faulted | + * |-----------|-----------| / |-----------| + * ptr2 ptr3 \ ptr + * + * It should merge: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr2 \ ptr + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr2[5 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size); + + /* + * Now move ptr back into place: + * + * |-----------|-----------------------| + * | faulted | faulted | + * |-----------|-----------------------| + * ptr ptr2 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + * ptr + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]); + ASSERT_NE(ptr, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); + + /* + * Now move ptr2 out of the way: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | faulted | \ | faulted | + * |-----------| |-----------| / |-----------| + * ptr ptr3 \ ptr2 + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Now move it back: + * + * |-----------|-----------|-----------| + * | faulted | faulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + * ptr + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); + + /* + * Move ptr3 out of place: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr \ ptr3 + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 1000); + ASSERT_NE(ptr3, MAP_FAILED); + + /* + * Now move it back: + * + * |-----------|-----------|-----------| + * | faulted | faulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + * ptr + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 1e3a595fbf01..c5a73617796a 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -5,6 +5,8 @@ */ #include "../kselftest_harness.h" +#include "thp_settings.h" + #include <strings.h> #include <pthread.h> #include <numa.h> @@ -14,6 +16,7 @@ #include <sys/types.h> #include <signal.h> #include <time.h> +#include "vm_util.h" #define TWOMEG (2<<20) #define RUNTIME (20) @@ -101,15 +104,13 @@ int migrate(uint64_t *ptr, int n1, int n2) void *access_mem(void *ptr) { - volatile uint64_t y = 0; - volatile uint64_t *x = ptr; - while (1) { pthread_testcancel(); - y += *x; - - /* Prevent the compiler from optimizing out the writes to y: */ - asm volatile("" : "+r" (y)); + /* Force a read from the memory pointed to by ptr. This ensures + * the memory access actually happens and prevents the compiler + * from optimizing away this entire loop. + */ + FORCE_READ((uint64_t *)ptr); } return NULL; @@ -185,6 +186,9 @@ TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME) uint64_t *ptr; int i; + if (!thp_is_enabled()) + SKIP(return, "Transparent Hugepages not available"); + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) SKIP(return, "Not enough threads or NUMA nodes available"); @@ -214,6 +218,9 @@ TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME) uint64_t *ptr; int i; + if (!thp_is_enabled()) + SKIP(return, "Transparent Hugepages not available"); + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) SKIP(return, "Not enough threads or NUMA nodes available"); diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index bb84476a177f..fccf9e797a0c 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -380,6 +380,359 @@ out: ksft_test_result_fail("%s\n", test_name); } +static bool is_multiple_vma_range_ok(unsigned int pattern_seed, + char *ptr, unsigned long page_size) +{ + int i; + + srand(pattern_seed); + for (i = 0; i <= 10; i += 2) { + int j; + char *buf = &ptr[i * page_size]; + size_t size = i == 4 ? 2 * page_size : page_size; + + for (j = 0; j < size; j++) { + char chr = rand(); + + if (chr != buf[j]) { + ksft_print_msg("page %d offset %d corrupted, expected %d got %d\n", + i, j, chr, buf[j]); + return false; + } + } + } + + return true; +} + +static void mremap_move_multiple_vmas(unsigned int pattern_seed, + unsigned long page_size, + bool dont_unmap) +{ + int mremap_flags = MREMAP_FIXED | MREMAP_MAYMOVE; + char *test_name = "mremap move multiple vmas"; + const size_t size = 11 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + int i; + + if (dont_unmap) + mremap_flags |= MREMAP_DONTUNMAP; + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + + tgt_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + if (munmap(tgt_ptr, 2 * size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 2 4 5 6 8 10 offset in buffer + * |*| |*| |*****| |*| |*| + * |*| |*| |*****| |*| |*| + * 0 1 2 3 4 5 6 pattern offset + */ + for (i = 1; i < 10; i += 2) { + if (i == 5) + continue; + + if (munmap(&ptr[i * page_size], page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + } + + srand(pattern_seed); + + /* Set up random patterns. */ + for (i = 0; i <= 10; i += 2) { + int j; + size_t size = i == 4 ? 2 * page_size : page_size; + char *buf = &ptr[i * page_size]; + + for (j = 0; j < size; j++) + buf[j] = rand(); + } + + /* First, just move the whole thing. */ + if (mremap(ptr, size, size, mremap_flags, tgt_ptr) == MAP_FAILED) { + perror("mremap"); + success = false; + goto out_unmap; + } + /* Check move was ok. */ + if (!is_multiple_vma_range_ok(pattern_seed, tgt_ptr, page_size)) { + success = false; + goto out_unmap; + } + + /* Move next to itself. */ + if (mremap(tgt_ptr, size, size, mremap_flags, + &tgt_ptr[size]) == MAP_FAILED) { + perror("mremap"); + success = false; + goto out_unmap; + } + /* Check that the move is ok. */ + if (!is_multiple_vma_range_ok(pattern_seed, &tgt_ptr[size], page_size)) { + success = false; + goto out_unmap; + } + + /* Map a range to overwrite. */ + if (mmap(tgt_ptr, size, PROT_NONE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED) { + perror("mmap tgt"); + success = false; + goto out_unmap; + } + /* Move and overwrite. */ + if (mremap(&tgt_ptr[size], size, size, + mremap_flags, tgt_ptr) == MAP_FAILED) { + perror("mremap"); + success = false; + goto out_unmap; + } + /* Check that the move is ok. */ + if (!is_multiple_vma_range_ok(pattern_seed, tgt_ptr, page_size)) { + success = false; + goto out_unmap; + } + +out_unmap: + if (munmap(tgt_ptr, 2 * size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); + +out: + if (success) + ksft_test_result_pass("%s%s\n", test_name, + dont_unmap ? " [dontunnmap]" : ""); + else + ksft_test_result_fail("%s%s\n", test_name, + dont_unmap ? " [dontunnmap]" : ""); +} + +static void mremap_shrink_multiple_vmas(unsigned long page_size, + bool inplace) +{ + char *test_name = "mremap shrink multiple vmas"; + const size_t size = 10 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + void *res; + int i; + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + + tgt_ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + if (munmap(tgt_ptr, size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 2 4 6 8 10 offset in buffer + * |*| |*| |*| |*| |*| |*| + * |*| |*| |*| |*| |*| |*| + */ + for (i = 1; i < 10; i += 2) { + if (munmap(&ptr[i * page_size], page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + } + + /* + * Shrink in-place across multiple VMAs and gaps so we end up with: + * + * 0 + * |*| + * |*| + */ + if (inplace) + res = mremap(ptr, size, page_size, 0); + else + res = mremap(ptr, size, page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + tgt_ptr); + + if (res == MAP_FAILED) { + perror("mremap"); + success = false; + goto out_unmap; + } + +out_unmap: + if (munmap(tgt_ptr, size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); +out: + if (success) + ksft_test_result_pass("%s%s\n", test_name, + inplace ? " [inplace]" : ""); + else + ksft_test_result_fail("%s%s\n", test_name, + inplace ? " [inplace]" : ""); +} + +static void mremap_move_multiple_vmas_split(unsigned int pattern_seed, + unsigned long page_size, + bool dont_unmap) +{ + char *test_name = "mremap move multiple vmas split"; + int mremap_flags = MREMAP_FIXED | MREMAP_MAYMOVE; + const size_t size = 10 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + int i; + + if (dont_unmap) + mremap_flags |= MREMAP_DONTUNMAP; + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + + tgt_ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out; + } + if (munmap(tgt_ptr, size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 1 2 3 4 5 6 7 8 9 10 offset in buffer + * |**********| |*******| + * |**********| |*******| + * 0 1 2 3 4 5 6 7 8 9 pattern offset + */ + if (munmap(&ptr[5 * page_size], page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* Set up random patterns. */ + srand(pattern_seed); + for (i = 0; i < 10; i++) { + int j; + char *buf = &ptr[i * page_size]; + + if (i == 5) + continue; + + for (j = 0; j < page_size; j++) + buf[j] = rand(); + } + + /* + * Move the below: + * + * <-------------> + * 0 1 2 3 4 5 6 7 8 9 10 offset in buffer + * |**********| |*******| + * |**********| |*******| + * 0 1 2 3 4 5 6 7 8 9 pattern offset + * + * Into: + * + * 0 1 2 3 4 5 6 7 offset in buffer + * |*****| |*****| + * |*****| |*****| + * 2 3 4 5 6 7 pattern offset + */ + if (mremap(&ptr[2 * page_size], size - 3 * page_size, size - 3 * page_size, + mremap_flags, tgt_ptr) == MAP_FAILED) { + perror("mremap"); + success = false; + goto out_unmap; + } + + /* Offset into random pattern. */ + srand(pattern_seed); + for (i = 0; i < 2 * page_size; i++) + rand(); + + /* Check pattern. */ + for (i = 0; i < 7; i++) { + int j; + char *buf = &tgt_ptr[i * page_size]; + + if (i == 3) + continue; + + for (j = 0; j < page_size; j++) { + char chr = rand(); + + if (chr != buf[j]) { + ksft_print_msg("page %d offset %d corrupted, expected %d got %d\n", + i, j, chr, buf[j]); + goto out_unmap; + } + } + } + +out_unmap: + if (munmap(tgt_ptr, size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); +out: + if (success) + ksft_test_result_pass("%s%s\n", test_name, + dont_unmap ? " [dontunnmap]" : ""); + else + ksft_test_result_fail("%s%s\n", test_name, + dont_unmap ? " [dontunnmap]" : ""); +} + /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) @@ -721,7 +1074,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 2; + int num_misc_tests = 8; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -848,6 +1201,12 @@ int main(int argc, char **argv) mremap_move_within_range(pattern_seed, rand_addr); mremap_move_1mb_from_start(pattern_seed, rand_addr); + mremap_shrink_multiple_vmas(page_size, /* inplace= */true); + mremap_shrink_multiple_vmas(page_size, /* inplace= */false); + mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ false); + mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true); + mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false); + mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true); if (run_perf_tests) { ksft_print_msg("\n%s\n", diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index b07acc86f4f0..0d4209eef0c3 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 + #define _GNU_SOURCE #include <stdio.h> #include <fcntl.h> @@ -34,8 +35,8 @@ #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; -unsigned long page_size; -unsigned int hpage_size; +size_t page_size; +size_t hpage_size; const char *progname; #define LEN(region) ((region.end - region.start)/page_size) @@ -1480,6 +1481,66 @@ static void transact_test(int page_size) extra_thread_faults); } +void zeropfn_tests(void) +{ + unsigned long long mem_size; + struct page_region vec; + int i, ret; + char *mmap_mem, *mem; + + /* Test with normal memory */ + mem_size = 10 * page_size; + mem = mmap(NULL, mem_size, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + /* Touch each page to ensure it's mapped */ + for (i = 0; i < mem_size; i += page_size) + (void)((volatile char *)mem)[i]; + + ret = pagemap_ioctl(mem, mem_size, &vec, 1, 0, + (mem_size / page_size), PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (mem_size / page_size), + "%s all pages must have PFNZERO set\n", __func__); + + munmap(mem, mem_size); + + /* Test with huge page if user_zero_page is set to 1 */ + if (!detect_huge_zeropage()) { + ksft_test_result_skip("%s use_zero_page not supported or set to 1\n", __func__); + return; + } + + mem_size = 2 * hpage_size; + mmap_mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + hpage_size) & ~(hpage_size - 1)); + + ret = madvise(mem, hpage_size, MADV_HUGEPAGE); + if (!ret) { + FORCE_READ(mem); + + ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0, + 0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (hpage_size / page_size), + "%s all huge pages must have PFNZERO set\n", __func__); + } else { + ksft_test_result_skip("%s huge page not supported\n", __func__); + } + + munmap(mmap_mem, mem_size); +} + int main(int __attribute__((unused)) argc, char *argv[]) { int shmid, buf_size, fd, i, ret; @@ -1494,7 +1555,7 @@ int main(int __attribute__((unused)) argc, char *argv[]) if (init_uffd()) ksft_exit_pass(); - ksft_set_plan(115); + ksft_set_plan(117); page_size = getpagesize(); hpage_size = read_pmd_pagesize(); @@ -1669,6 +1730,9 @@ int main(int __attribute__((unused)) argc, char *argv[]) /* 16. Userfaultfd tests */ userfaultfd_tests(); + /* 17. ZEROPFN tests */ + zeropfn_tests(); + close(pagemap_fd); ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index dddd1dd8af14..a38c984103ce 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -429,7 +429,9 @@ CATEGORY="vma_merge" run_test ./merge if [ -x ./memfd_secret ] then -(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix +if [ -f /proc/sys/kernel/yama/ptrace_scope ]; then + (echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix +fi CATEGORY="memfd_secret" run_test ./memfd_secret fi @@ -483,6 +485,10 @@ CATEGORY="thp" run_test ./khugepaged CATEGORY="thp" run_test ./khugepaged -s 2 +CATEGORY="thp" run_test ./khugepaged all:shmem + +CATEGORY="thp" run_test ./khugepaged -s 4 all:shmem + CATEGORY="thp" run_test ./transhuge-stress -d 20 # Try to create XFS if not provided diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index 8e1462ce0532..8a3f2b4b2186 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -6,8 +6,10 @@ #include <stdint.h> #include <malloc.h> #include <sys/mman.h> + #include "../kselftest.h" #include "vm_util.h" +#include "thp_settings.h" #define PAGEMAP_FILE_PATH "/proc/self/pagemap" #define TEST_ITERATIONS 10000 @@ -78,8 +80,13 @@ static void test_hugepage(int pagemap_fd, int pagesize) { char *map; int i, ret; - size_t hpage_len = read_pmd_pagesize(); + if (!thp_is_enabled()) { + ksft_test_result_skip("Transparent Hugepages not available\n"); + return; + } + + size_t hpage_len = read_pmd_pagesize(); if (!hpage_len) ksft_exit_fail_msg("Reading PMD pagesize failed"); diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index f0d9c035641d..05de1fc0005b 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -399,7 +399,6 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr) { size_t i; - int dummy = 0; unsigned char buf[1024]; srand(time(NULL)); @@ -441,8 +440,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, madvise(*addr, fd_size, MADV_HUGEPAGE); for (size_t i = 0; i < fd_size; i++) - dummy += *(*addr + i); - asm volatile("" : "+r" (dummy)); + FORCE_READ((*addr + i)); if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index ad872af1c81a..bad60ac52874 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -381,3 +381,14 @@ unsigned long thp_shmem_supported_orders(void) { return __thp_supported_orders(true); } + +bool thp_is_enabled(void) +{ + if (access(THP_SYSFS, F_OK) != 0) + return false; + + int mode = thp_read_string("enabled", thp_enabled_strings); + + /* THP is considered enabled if it's either "always" or "madvise" */ + return mode == 1 || mode == 3; +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index fc131d23d593..6c07f70beee9 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -84,4 +84,6 @@ void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); unsigned long thp_shmem_supported_orders(void); +bool thp_is_enabled(void); + #endif /* __THP_SETTINGS_H__ */ diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 95b6f043a3cb..8e2b08dc5762 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -77,40 +77,20 @@ void show(unsigned long ps) system(buf); } -unsigned long thuge_read_sysfs(int warn, char *fmt, ...) +unsigned long read_free(unsigned long ps) { - char *line = NULL; - size_t linelen = 0; - char buf[100]; - FILE *f; - va_list ap; unsigned long val = 0; + char buf[100]; - va_start(ap, fmt); - vsnprintf(buf, sizeof buf, fmt, ap); - va_end(ap); + snprintf(buf, sizeof(buf), + "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); + if (read_sysfs(buf, &val) && ps != getpagesize()) + ksft_print_msg("missing %s\n", buf); - f = fopen(buf, "r"); - if (!f) { - if (warn) - ksft_print_msg("missing %s\n", buf); - return 0; - } - if (getline(&line, &linelen, f) > 0) { - sscanf(line, "%lu", &val); - } - fclose(f); - free(line); return val; } -unsigned long read_free(unsigned long ps) -{ - return thuge_read_sysfs(ps != getpagesize(), - "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", - ps >> 10); -} - void test_mmap(unsigned long size, unsigned flags) { char *map; @@ -173,6 +153,7 @@ void test_shmget(unsigned long size, unsigned flags) void find_pagesizes(void) { unsigned long largest = getpagesize(); + unsigned long shmmax_val = 0; int i; glob_t g; @@ -195,7 +176,8 @@ void find_pagesizes(void) } globfree(&g); - if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) + read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val); + if (shmmax_val < NUM_PAGES * largest) ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index c73fd5d455c8..50501b38e34e 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1027,6 +1027,9 @@ static void uffd_poison_handle_fault( do_uffdio_poison(uffd, offset); } +/* Make sure to cover odd/even, and minimum duplications */ +#define UFFD_POISON_TEST_NPAGES 4 + static void uffd_poison_test(uffd_test_args_t *targs) { pthread_t uffd_mon; @@ -1034,12 +1037,17 @@ static void uffd_poison_test(uffd_test_args_t *targs) struct uffd_args args = { 0 }; struct sigaction act = { 0 }; unsigned long nr_sigbus = 0; - unsigned long nr; + unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES; + + if (nr_pages < poison_pages) { + uffd_test_skip("Too few pages for POISON test"); + return; + } fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - uffd_register_poison(uffd, area_dst, nr_pages * page_size); - memset(area_src, 0, nr_pages * page_size); + uffd_register_poison(uffd, area_dst, poison_pages * page_size); + memset(area_src, 0, poison_pages * page_size); args.handle_fault = uffd_poison_handle_fault; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -1051,7 +1059,7 @@ static void uffd_poison_test(uffd_test_args_t *targs) if (sigaction(SIGBUS, &act, 0)) err("sigaction"); - for (nr = 0; nr < nr_pages; ++nr) { + for (nr = 0; nr < poison_pages; ++nr) { unsigned long offset = nr * page_size; const char *bytes = (const char *) area_dst + offset; const char *i; @@ -1078,9 +1086,9 @@ static void uffd_poison_test(uffd_test_args_t *targs) if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); - if (nr_sigbus != nr_pages / 2) + if (nr_sigbus != poison_pages / 2) err("expected to receive %lu SIGBUS, actually received %lu", - nr_pages / 2, nr_sigbus); + poison_pages / 2, nr_sigbus); uffd_test_pass(); } diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 5492e3f784df..9dafa7669ef9 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -524,3 +524,34 @@ int read_sysfs(const char *file_path, unsigned long *val) return 0; } + +void *sys_mremap(void *old_address, unsigned long old_size, + unsigned long new_size, int flags, void *new_address) +{ + return (void *)syscall(__NR_mremap, (unsigned long)old_address, + old_size, new_size, flags, + (unsigned long)new_address); +} + +bool detect_huge_zeropage(void) +{ + int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", + O_RDONLY); + bool enabled = 0; + char buf[15]; + int ret; + + if (fd < 0) + return 0; + + ret = pread(fd, buf, sizeof(buf), 0); + if (ret > 0 && ret < sizeof(buf)) { + buf[ret] = 0; + + if (strtoul(buf, NULL, 10) == 1) + enabled = 1; + } + + close(fd); + return enabled; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b8136d12a0f8..c20298ae98ea 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -18,6 +18,13 @@ #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) +/* + * Ignore the checkpatch warning, we must read from x but don't want to do + * anything with it in order to trigger a read page fault. We therefore must use + * volatile to stop the compiler from optimising this away. + */ +#define FORCE_READ(x) (*(volatile typeof(x) *)x) + extern unsigned int __page_size; extern unsigned int __page_shift; @@ -44,6 +51,8 @@ static inline unsigned int pshift(void) return __page_shift; } +bool detect_huge_zeropage(void); + /* * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with * ENOENT on unlinked files. See @@ -117,6 +126,9 @@ static inline void log_test_result(int result) ksft_test_result_report(result, "%s\n", test_name); } +void *sys_mremap(void *old_address, unsigned long old_size, + unsigned long new_size, int flags, void *new_address); + /* * On ppc64 this will only work with radix 2M hugepage size */ diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 973968f45bba..19bb333e2485 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -5,6 +5,7 @@ /proc-2-is-kthread /proc-fsconfig-hidepid /proc-loadavg-001 +/proc-maps-race /proc-multiple-procfs /proc-empty-vm /proc-pid-vm diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index b12921b9794b..50aba102201a 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -9,6 +9,7 @@ TEST_GEN_PROGS += fd-002-posix-eq TEST_GEN_PROGS += fd-003-kthread TEST_GEN_PROGS += proc-2-is-kthread TEST_GEN_PROGS += proc-loadavg-001 +TEST_GEN_PROGS += proc-maps-race TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c new file mode 100644 index 000000000000..66773685a047 --- /dev/null +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022 Google LLC. + * Author: Suren Baghdasaryan <surenb@google.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Fork a child that concurrently modifies address space while the main + * process is reading /proc/$PID/maps and verifying the results. Address + * space modifications include: + * VMA splitting and merging + * + */ +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> + +/* /proc/pid/maps parsing routines */ +struct page_content { + char *data; + ssize_t size; +}; + +#define LINE_MAX_SIZE 256 + +struct line_content { + char text[LINE_MAX_SIZE]; + unsigned long start_addr; + unsigned long end_addr; +}; + +enum test_state { + INIT, + CHILD_READY, + PARENT_READY, + SETUP_READY, + SETUP_MODIFY_MAPS, + SETUP_MAPS_MODIFIED, + SETUP_RESTORE_MAPS, + SETUP_MAPS_RESTORED, + TEST_READY, + TEST_DONE, +}; + +struct vma_modifier_info; + +FIXTURE(proc_maps_race) +{ + struct vma_modifier_info *mod_info; + struct page_content page1; + struct page_content page2; + struct line_content last_line; + struct line_content first_line; + unsigned long duration_sec; + int shared_mem_size; + int page_size; + int vma_count; + bool verbose; + int maps_fd; + pid_t pid; +}; + +typedef bool (*vma_modifier_op)(FIXTURE_DATA(proc_maps_race) *self); +typedef bool (*vma_mod_result_check_op)(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line); + +struct vma_modifier_info { + int vma_count; + void *addr; + int prot; + void *next_addr; + vma_modifier_op vma_modify; + vma_modifier_op vma_restore; + vma_mod_result_check_op vma_mod_check; + pthread_mutex_t sync_lock; + pthread_cond_t sync_cond; + enum test_state curr_state; + bool exit; + void *child_mapped_addr[]; +}; + + +static bool read_two_pages(FIXTURE_DATA(proc_maps_race) *self) +{ + ssize_t bytes_read; + + if (lseek(self->maps_fd, 0, SEEK_SET) < 0) + return false; + + bytes_read = read(self->maps_fd, self->page1.data, self->page_size); + if (bytes_read <= 0) + return false; + + self->page1.size = bytes_read; + + bytes_read = read(self->maps_fd, self->page2.data, self->page_size); + if (bytes_read <= 0) + return false; + + self->page2.size = bytes_read; + + return true; +} + +static void copy_first_line(struct page_content *page, char *first_line) +{ + char *pos = strchr(page->data, '\n'); + + strncpy(first_line, page->data, pos - page->data); + first_line[pos - page->data] = '\0'; +} + +static void copy_last_line(struct page_content *page, char *last_line) +{ + /* Get the last line in the first page */ + const char *end = page->data + page->size - 1; + /* skip last newline */ + const char *pos = end - 1; + + /* search previous newline */ + while (pos[-1] != '\n') + pos--; + strncpy(last_line, pos, end - pos); + last_line[end - pos] = '\0'; +} + +/* Read the last line of the first page and the first line of the second page */ +static bool read_boundary_lines(FIXTURE_DATA(proc_maps_race) *self, + struct line_content *last_line, + struct line_content *first_line) +{ + if (!read_two_pages(self)) + return false; + + copy_last_line(&self->page1, last_line->text); + copy_first_line(&self->page2, first_line->text); + + return sscanf(last_line->text, "%lx-%lx", &last_line->start_addr, + &last_line->end_addr) == 2 && + sscanf(first_line->text, "%lx-%lx", &first_line->start_addr, + &first_line->end_addr) == 2; +} + +/* Thread synchronization routines */ +static void wait_for_state(struct vma_modifier_info *mod_info, enum test_state state) +{ + pthread_mutex_lock(&mod_info->sync_lock); + while (mod_info->curr_state != state) + pthread_cond_wait(&mod_info->sync_cond, &mod_info->sync_lock); + pthread_mutex_unlock(&mod_info->sync_lock); +} + +static void signal_state(struct vma_modifier_info *mod_info, enum test_state state) +{ + pthread_mutex_lock(&mod_info->sync_lock); + mod_info->curr_state = state; + pthread_cond_signal(&mod_info->sync_cond); + pthread_mutex_unlock(&mod_info->sync_lock); +} + +static void stop_vma_modifier(struct vma_modifier_info *mod_info) +{ + wait_for_state(mod_info, SETUP_READY); + mod_info->exit = true; + signal_state(mod_info, SETUP_MODIFY_MAPS); +} + +static void print_first_lines(char *text, int nr) +{ + const char *end = text; + + while (nr && (end = strchr(end, '\n')) != NULL) { + nr--; + end++; + } + + if (end) { + int offs = end - text; + + text[offs] = '\0'; + printf(text); + text[offs] = '\n'; + printf("\n"); + } else { + printf(text); + } +} + +static void print_last_lines(char *text, int nr) +{ + const char *start = text + strlen(text); + + nr++; /* to ignore the last newline */ + while (nr) { + while (start > text && *start != '\n') + start--; + nr--; + start--; + } + printf(start); +} + +static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self) +{ + if (!self->verbose) + return; + + printf("%s", title); + /* Print 3 boundary lines from each page */ + print_last_lines(self->page1.data, 3); + printf("-----------------page boundary-----------------\n"); + print_first_lines(self->page2.data, 3); +} + +static bool print_boundaries_on(bool condition, const char *title, + FIXTURE_DATA(proc_maps_race) *self) +{ + if (self->verbose && condition) + print_boundaries(title, self); + + return condition; +} + +static void report_test_start(const char *name, bool verbose) +{ + if (verbose) + printf("==== %s ====\n", name); +} + +static struct timespec print_ts; + +static void start_test_loop(struct timespec *ts, bool verbose) +{ + if (verbose) + print_ts.tv_sec = ts->tv_sec; +} + +static void end_test_iteration(struct timespec *ts, bool verbose) +{ + if (!verbose) + return; + + /* Update every second */ + if (print_ts.tv_sec == ts->tv_sec) + return; + + printf("."); + fflush(stdout); + print_ts.tv_sec = ts->tv_sec; +} + +static void end_test_loop(bool verbose) +{ + if (verbose) + printf("\n"); +} + +static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self, + struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + print_boundaries("Before modification", self); + + signal_state(self->mod_info, SETUP_MODIFY_MAPS); + wait_for_state(self->mod_info, SETUP_MAPS_MODIFIED); + + /* Copy last line of the first page and first line of the last page */ + if (!read_boundary_lines(self, mod_last_line, mod_first_line)) + return false; + + print_boundaries("After modification", self); + + signal_state(self->mod_info, SETUP_RESTORE_MAPS); + wait_for_state(self->mod_info, SETUP_MAPS_RESTORED); + + /* Copy last line of the first page and first line of the last page */ + if (!read_boundary_lines(self, restored_last_line, restored_first_line)) + return false; + + print_boundaries("After restore", self); + + if (!self->mod_info->vma_mod_check(mod_last_line, mod_first_line, + restored_last_line, restored_first_line)) + return false; + + /* + * The content of these lines after modify+resore should be the same + * as the original. + */ + return strcmp(restored_last_line->text, self->last_line.text) == 0 && + strcmp(restored_first_line->text, self->first_line.text) == 0; +} + +static inline bool split_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != MAP_FAILED; +} + +static inline bool merge_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != MAP_FAILED; +} + +static inline bool check_split_result(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + /* Make sure vmas at the boundaries are changing */ + return strcmp(mod_last_line->text, restored_last_line->text) != 0 && + strcmp(mod_first_line->text, restored_first_line->text) != 0; +} + +static inline bool shrink_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mremap(self->mod_info->addr, self->page_size * 3, + self->page_size, 0) != MAP_FAILED; +} + +static inline bool expand_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mremap(self->mod_info->addr, self->page_size, + self->page_size * 3, 0) != MAP_FAILED; +} + +static inline bool check_shrink_result(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + /* Make sure only the last vma of the first page is changing */ + return strcmp(mod_last_line->text, restored_last_line->text) != 0 && + strcmp(mod_first_line->text, restored_first_line->text) == 0; +} + +static inline bool remap_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + /* + * Remap the last page of the next vma into the middle of the vma. + * This splits the current vma and the first and middle parts (the + * parts at lower addresses) become the last vma objserved in the + * first page and the first vma observed in the last page. + */ + return mremap(self->mod_info->next_addr + self->page_size * 2, self->page_size, + self->page_size, MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP, + self->mod_info->addr + self->page_size) != MAP_FAILED; +} + +static inline bool patch_vma(FIXTURE_DATA(proc_maps_race) *self) +{ + return mprotect(self->mod_info->addr + self->page_size, self->page_size, + self->mod_info->prot) == 0; +} + +static inline bool check_remap_result(struct line_content *mod_last_line, + struct line_content *mod_first_line, + struct line_content *restored_last_line, + struct line_content *restored_first_line) +{ + /* Make sure vmas at the boundaries are changing */ + return strcmp(mod_last_line->text, restored_last_line->text) != 0 && + strcmp(mod_first_line->text, restored_first_line->text) != 0; +} + +FIXTURE_SETUP(proc_maps_race) +{ + const char *verbose = getenv("VERBOSE"); + const char *duration = getenv("DURATION"); + struct vma_modifier_info *mod_info; + pthread_mutexattr_t mutex_attr; + pthread_condattr_t cond_attr; + unsigned long duration_sec; + char fname[32]; + + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + self->verbose = verbose && !strncmp(verbose, "1", 1); + duration_sec = duration ? atol(duration) : 0; + self->duration_sec = duration_sec ? duration_sec : 5UL; + + /* + * Have to map enough vmas for /proc/pid/maps to contain more than one + * page worth of vmas. Assume at least 32 bytes per line in maps output + */ + self->vma_count = self->page_size / 32 + 1; + self->shared_mem_size = sizeof(struct vma_modifier_info) + self->vma_count * sizeof(void *); + + /* map shared memory for communication with the child process */ + self->mod_info = (struct vma_modifier_info *)mmap(NULL, self->shared_mem_size, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(self->mod_info, MAP_FAILED); + mod_info = self->mod_info; + + /* Initialize shared members */ + pthread_mutexattr_init(&mutex_attr); + pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED); + ASSERT_EQ(pthread_mutex_init(&mod_info->sync_lock, &mutex_attr), 0); + pthread_condattr_init(&cond_attr); + pthread_condattr_setpshared(&cond_attr, PTHREAD_PROCESS_SHARED); + ASSERT_EQ(pthread_cond_init(&mod_info->sync_cond, &cond_attr), 0); + mod_info->vma_count = self->vma_count; + mod_info->curr_state = INIT; + mod_info->exit = false; + + self->pid = fork(); + if (!self->pid) { + /* Child process modifying the address space */ + int prot = PROT_READ | PROT_WRITE; + int i; + + for (i = 0; i < mod_info->vma_count; i++) { + mod_info->child_mapped_addr[i] = mmap(NULL, self->page_size * 3, prot, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(mod_info->child_mapped_addr[i], MAP_FAILED); + /* change protection in adjacent maps to prevent merging */ + prot ^= PROT_WRITE; + } + signal_state(mod_info, CHILD_READY); + wait_for_state(mod_info, PARENT_READY); + while (true) { + signal_state(mod_info, SETUP_READY); + wait_for_state(mod_info, SETUP_MODIFY_MAPS); + if (mod_info->exit) + break; + + ASSERT_TRUE(mod_info->vma_modify(self)); + signal_state(mod_info, SETUP_MAPS_MODIFIED); + wait_for_state(mod_info, SETUP_RESTORE_MAPS); + ASSERT_TRUE(mod_info->vma_restore(self)); + signal_state(mod_info, SETUP_MAPS_RESTORED); + + wait_for_state(mod_info, TEST_READY); + while (mod_info->curr_state != TEST_DONE) { + ASSERT_TRUE(mod_info->vma_modify(self)); + ASSERT_TRUE(mod_info->vma_restore(self)); + } + } + for (i = 0; i < mod_info->vma_count; i++) + munmap(mod_info->child_mapped_addr[i], self->page_size * 3); + + exit(0); + } + + sprintf(fname, "/proc/%d/maps", self->pid); + self->maps_fd = open(fname, O_RDONLY); + ASSERT_NE(self->maps_fd, -1); + + /* Wait for the child to map the VMAs */ + wait_for_state(mod_info, CHILD_READY); + + /* Read first two pages */ + self->page1.data = malloc(self->page_size); + ASSERT_NE(self->page1.data, NULL); + self->page2.data = malloc(self->page_size); + ASSERT_NE(self->page2.data, NULL); + + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + /* + * Find the addresses corresponding to the last line in the first page + * and the first line in the last page. + */ + mod_info->addr = NULL; + mod_info->next_addr = NULL; + for (int i = 0; i < mod_info->vma_count; i++) { + if (mod_info->child_mapped_addr[i] == (void *)self->last_line.start_addr) { + mod_info->addr = mod_info->child_mapped_addr[i]; + mod_info->prot = PROT_READ; + /* Even VMAs have write permission */ + if ((i % 2) == 0) + mod_info->prot |= PROT_WRITE; + } else if (mod_info->child_mapped_addr[i] == (void *)self->first_line.start_addr) { + mod_info->next_addr = mod_info->child_mapped_addr[i]; + } + + if (mod_info->addr && mod_info->next_addr) + break; + } + ASSERT_TRUE(mod_info->addr && mod_info->next_addr); + + signal_state(mod_info, PARENT_READY); + +} + +FIXTURE_TEARDOWN(proc_maps_race) +{ + int status; + + stop_vma_modifier(self->mod_info); + + free(self->page2.data); + free(self->page1.data); + + for (int i = 0; i < self->vma_count; i++) + munmap(self->mod_info->child_mapped_addr[i], self->page_size); + close(self->maps_fd); + waitpid(self->pid, &status, 0); + munmap(self->mod_info, self->shared_mem_size); +} + +TEST_F(proc_maps_race, test_maps_tearing_from_split) +{ + struct vma_modifier_info *mod_info = self->mod_info; + + struct line_content split_last_line; + struct line_content split_first_line; + struct line_content restored_last_line; + struct line_content restored_first_line; + + wait_for_state(mod_info, SETUP_READY); + + /* re-read the file to avoid using stale data from previous test */ + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + mod_info->vma_modify = split_vma; + mod_info->vma_restore = merge_vma; + mod_info->vma_mod_check = check_split_result; + + report_test_start("Tearing from split", self->verbose); + ASSERT_TRUE(capture_mod_pattern(self, &split_last_line, &split_first_line, + &restored_last_line, &restored_first_line)); + + /* Now start concurrent modifications for self->duration_sec */ + signal_state(mod_info, TEST_READY); + + struct line_content new_last_line; + struct line_content new_first_line; + struct timespec start_ts, end_ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + start_test_loop(&start_ts, self->verbose); + do { + bool last_line_changed; + bool first_line_changed; + + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); + + /* Check if we read vmas after split */ + if (!strcmp(new_last_line.text, split_last_line.text)) { + /* + * The vmas should be consistent with split results, + * however if vma was concurrently restored after a + * split, it can be reported twice (first the original + * split one, then the same vma but extended after the + * merge) because we found it as the next vma again. + * In that case new first line will be the same as the + * last restored line. + */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, split_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text), + "Split result invalid", self)); + } else { + /* The vmas should be consistent with merge results */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_last_line.text, restored_last_line.text), + "Merge result invalid", self)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, restored_first_line.text), + "Merge result invalid", self)); + } + /* + * First and last lines should change in unison. If the last + * line changed then the first line should change as well and + * vice versa. + */ + last_line_changed = strcmp(new_last_line.text, self->last_line.text) != 0; + first_line_changed = strcmp(new_first_line.text, self->first_line.text) != 0; + ASSERT_EQ(last_line_changed, first_line_changed); + + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + end_test_iteration(&end_ts, self->verbose); + } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + end_test_loop(self->verbose); + + /* Signal the modifyer thread to stop and wait until it exits */ + signal_state(mod_info, TEST_DONE); +} + +TEST_F(proc_maps_race, test_maps_tearing_from_resize) +{ + struct vma_modifier_info *mod_info = self->mod_info; + + struct line_content shrunk_last_line; + struct line_content shrunk_first_line; + struct line_content restored_last_line; + struct line_content restored_first_line; + + wait_for_state(mod_info, SETUP_READY); + + /* re-read the file to avoid using stale data from previous test */ + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + mod_info->vma_modify = shrink_vma; + mod_info->vma_restore = expand_vma; + mod_info->vma_mod_check = check_shrink_result; + + report_test_start("Tearing from resize", self->verbose); + ASSERT_TRUE(capture_mod_pattern(self, &shrunk_last_line, &shrunk_first_line, + &restored_last_line, &restored_first_line)); + + /* Now start concurrent modifications for self->duration_sec */ + signal_state(mod_info, TEST_READY); + + struct line_content new_last_line; + struct line_content new_first_line; + struct timespec start_ts, end_ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + start_test_loop(&start_ts, self->verbose); + do { + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); + + /* Check if we read vmas after shrinking it */ + if (!strcmp(new_last_line.text, shrunk_last_line.text)) { + /* + * The vmas should be consistent with shrunk results, + * however if the vma was concurrently restored, it + * can be reported twice (first as shrunk one, then + * as restored one) because we found it as the next vma + * again. In that case new first line will be the same + * as the last restored line. + */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, shrunk_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text), + "Shrink result invalid", self)); + } else { + /* The vmas should be consistent with the original/resored state */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_last_line.text, restored_last_line.text), + "Expand result invalid", self)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, restored_first_line.text), + "Expand result invalid", self)); + } + + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + end_test_iteration(&end_ts, self->verbose); + } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + end_test_loop(self->verbose); + + /* Signal the modifyer thread to stop and wait until it exits */ + signal_state(mod_info, TEST_DONE); +} + +TEST_F(proc_maps_race, test_maps_tearing_from_remap) +{ + struct vma_modifier_info *mod_info = self->mod_info; + + struct line_content remapped_last_line; + struct line_content remapped_first_line; + struct line_content restored_last_line; + struct line_content restored_first_line; + + wait_for_state(mod_info, SETUP_READY); + + /* re-read the file to avoid using stale data from previous test */ + ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line)); + + mod_info->vma_modify = remap_vma; + mod_info->vma_restore = patch_vma; + mod_info->vma_mod_check = check_remap_result; + + report_test_start("Tearing from remap", self->verbose); + ASSERT_TRUE(capture_mod_pattern(self, &remapped_last_line, &remapped_first_line, + &restored_last_line, &restored_first_line)); + + /* Now start concurrent modifications for self->duration_sec */ + signal_state(mod_info, TEST_READY); + + struct line_content new_last_line; + struct line_content new_first_line; + struct timespec start_ts, end_ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); + start_test_loop(&start_ts, self->verbose); + do { + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); + + /* Check if we read vmas after remapping it */ + if (!strcmp(new_last_line.text, remapped_last_line.text)) { + /* + * The vmas should be consistent with remap results, + * however if the vma was concurrently restored, it + * can be reported twice (first as split one, then + * as restored one) because we found it as the next vma + * again. In that case new first line will be the same + * as the last restored line. + */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, remapped_first_line.text) && + strcmp(new_first_line.text, restored_last_line.text), + "Remap result invalid", self)); + } else { + /* The vmas should be consistent with the original/resored state */ + ASSERT_FALSE(print_boundaries_on( + strcmp(new_last_line.text, restored_last_line.text), + "Remap restore result invalid", self)); + ASSERT_FALSE(print_boundaries_on( + strcmp(new_first_line.text, restored_first_line.text), + "Remap restore result invalid", self)); + } + + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); + end_test_iteration(&end_ts, self->verbose); + } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); + end_test_loop(self->verbose); + + /* Signal the modifyer thread to stop and wait until it exits */ + signal_state(mod_info, TEST_DONE); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 430f5e13bf55..01cf4f3da4e0 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -123,6 +123,10 @@ void BPF_STRUCT_OPS(maximal_cgroup_cancel_move, struct task_struct *p, void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight) {} +void BPF_STRUCT_OPS(maximal_cgroup_set_bandwidth, struct cgroup *cgrp, + u64 period_us, u64 quota_us, u64 burst_us) +{} + s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init) { return scx_bpf_create_dsq(DSQ_ID, -1); @@ -160,6 +164,7 @@ struct sched_ext_ops maximal_ops = { .cgroup_move = (void *) maximal_cgroup_move, .cgroup_cancel_move = (void *) maximal_cgroup_cancel_move, .cgroup_set_weight = (void *) maximal_cgroup_set_weight, + .cgroup_set_bandwidth = (void *) maximal_cgroup_set_bandwidth, .init = (void *) maximal_init, .exit = (void *) maximal_exit, .name = "maximal", |