diff options
Diffstat (limited to 'tools/testing/selftests/resctrl')
-rw-r--r-- | tools/testing/selftests/resctrl/Makefile | 7 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/cache.c | 10 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/cat_test.c | 48 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/cmt_test.c | 71 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/fill_buf.c | 45 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/mba_test.c | 88 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/mbm_test.c | 73 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/resctrl.h | 115 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/resctrl_tests.c | 130 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/resctrl_val.c | 684 | ||||
-rw-r--r-- | tools/testing/selftests/resctrl/resctrlfs.c | 209 |
11 files changed, 770 insertions, 710 deletions
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index 2deac2031de9..984534cfbf1b 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -1,10 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE +CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := resctrl_tests +LOCAL_HDRS += $(wildcard *.h) + include ../lib.mk +CFLAGS += -I$(top_srcdir)/tools/include -$(OUTPUT)/resctrl_tests: $(wildcard *.[ch]) +$(OUTPUT)/resctrl_tests: $(wildcard *.c) diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c index 1b339d6bbff1..1ff1104e6575 100644 --- a/tools/testing/selftests/resctrl/cache.c +++ b/tools/testing/selftests/resctrl/cache.c @@ -101,12 +101,12 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy) * * Return: 0 on success, < 0 on error. */ -static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value) +static int print_results_cache(const char *filename, pid_t bm_pid, __u64 llc_value) { FILE *fp; if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { - printf("Pid: %d \t LLC_value: %llu\n", bm_pid, llc_value); + printf("Pid: %d \t LLC_value: %llu\n", (int)bm_pid, llc_value); } else { fp = fopen(filename, "a"); if (!fp) { @@ -114,7 +114,7 @@ static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value return -1; } - fprintf(fp, "Pid: %d \t llc_value: %llu\n", bm_pid, llc_value); + fprintf(fp, "Pid: %d \t llc_value: %llu\n", (int)bm_pid, llc_value); fclose(fp); } @@ -133,7 +133,7 @@ static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value * Return: =0 on success. <0 on failure. */ int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, - const char *filename, int bm_pid) + const char *filename, pid_t bm_pid) { int ret; @@ -161,7 +161,7 @@ int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, * * Return: =0 on success. <0 on failure. */ -int measure_llc_resctrl(const char *filename, int bm_pid) +int measure_llc_resctrl(const char *filename, pid_t bm_pid) { unsigned long llc_occu_resc = 0; int ret; diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index 4cb991be8e31..94cfdba5308d 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -128,7 +128,7 @@ static int check_results(struct resctrl_val_param *param, const char *cache_type return fail; } -void cat_test_cleanup(void) +static void cat_test_cleanup(void) { remove(RESULT_FILE_NAME); } @@ -158,7 +158,6 @@ static int cat_test(const struct resctrl_test *test, struct resctrl_val_param *param, size_t span, unsigned long current_mask) { - char *resctrl_val = param->resctrl_val; struct perf_event_read pe_read; struct perf_event_attr pea; cpu_set_t old_affinity; @@ -178,8 +177,7 @@ static int cat_test(const struct resctrl_test *test, return ret; /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ - ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, - resctrl_val); + ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); if (ret) goto reset_affinity; @@ -272,7 +270,6 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param start_mask = create_bit_mask(start, n); struct resctrl_val_param param = { - .resctrl_val = CAT_STR, .ctrlgrp = "c1", .filename = RESULT_FILE_NAME, .num_of_runs = 0, @@ -284,21 +281,40 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param ret = cat_test(test, uparams, ¶m, span, start_mask); if (ret) - goto out; + return ret; ret = check_results(¶m, test->resource, cache_total_size, full_cache_mask, start_mask); -out: - cat_test_cleanup(); - return ret; } +static bool arch_supports_noncont_cat(const struct resctrl_test *test) +{ + /* AMD always supports non-contiguous CBM. */ + if (get_vendor() == ARCH_AMD) + return true; + +#if defined(__i386__) || defined(__x86_64__) /* arch */ + unsigned int eax, ebx, ecx, edx; + /* Intel support for non-contiguous CBM needs to be discovered. */ + if (!strcmp(test->resource, "L3")) + __cpuid_count(0x10, 1, eax, ebx, ecx, edx); + else if (!strcmp(test->resource, "L2")) + __cpuid_count(0x10, 2, eax, ebx, ecx, edx); + else + return false; + + return ((ecx >> 3) & 1); +#endif /* end arch */ + + return false; +} + static int noncont_cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) { unsigned long full_cache_mask, cont_mask, noncont_mask; - unsigned int eax, ebx, ecx, edx, sparse_masks; + unsigned int sparse_masks; int bit_center, ret; char schemata[64]; @@ -307,15 +323,8 @@ static int noncont_cat_run_test(const struct resctrl_test *test, if (ret) return ret; - if (!strcmp(test->resource, "L3")) - __cpuid_count(0x10, 1, eax, ebx, ecx, edx); - else if (!strcmp(test->resource, "L2")) - __cpuid_count(0x10, 2, eax, ebx, ecx, edx); - else - return -EINVAL; - - if (sparse_masks != ((ecx >> 3) & 1)) { - ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n"); + if (arch_supports_noncont_cat(test) != sparse_masks) { + ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n"); return 1; } @@ -373,6 +382,7 @@ struct resctrl_test l3_cat_test = { .resource = "L3", .feature_check = test_resource_feature_check, .run_test = cat_run_test, + .cleanup = cat_test_cleanup, }; struct resctrl_test l3_noncont_cat_test = { diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index a81f91222a89..d09e693dc739 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -16,6 +16,17 @@ #define MAX_DIFF 2000000 #define MAX_DIFF_PERCENT 15 +#define CON_MON_LCC_OCCUP_PATH \ + "%s/%s/mon_data/mon_L3_%02d/llc_occupancy" + +static int cmt_init(const struct resctrl_val_param *param, int domain_id) +{ + sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, + param->ctrlgrp, domain_id); + + return 0; +} + static int cmt_setup(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *p) @@ -29,6 +40,13 @@ static int cmt_setup(const struct resctrl_test *test, return 0; } +static int cmt_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + sleep(1); + return measure_llc_resctrl(param->filename, bm_pid); +} + static int show_results_info(unsigned long sum_llc_val, int no_of_bits, unsigned long cache_span, unsigned long max_diff, unsigned long max_diff_percent, unsigned long num_of_runs, @@ -40,11 +58,11 @@ static int show_results_info(unsigned long sum_llc_val, int no_of_bits, int ret; avg_llc_val = sum_llc_val / num_of_runs; - avg_diff = (long)abs(cache_span - avg_llc_val); + avg_diff = (long)(cache_span - avg_llc_val); diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100; ret = platform && abs((int)diff_percent) > max_diff_percent && - abs(avg_diff) > max_diff; + labs(avg_diff) > max_diff; ksft_print_msg("%s Check cache miss rate within %lu%%\n", ret ? "Fail:" : "Pass:", max_diff_percent); @@ -81,32 +99,29 @@ static int check_results(struct resctrl_val_param *param, size_t span, int no_of } /* Field 3 is llc occ resc value */ - if (runs > 0) - sum_llc_occu_resc += strtoul(token_array[3], NULL, 0); + sum_llc_occu_resc += strtoul(token_array[3], NULL, 0); runs++; } fclose(fp); return show_results_info(sum_llc_occu_resc, no_of_bits, span, - MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true); + MAX_DIFF, MAX_DIFF_PERCENT, runs, true); } -void cmt_test_cleanup(void) +static void cmt_test_cleanup(void) { remove(RESULT_FILE_NAME); } static int cmt_run_test(const struct resctrl_test *test, const struct user_params *uparams) { - const char * const *cmd = uparams->benchmark_cmd; - const char *new_cmd[BENCHMARK_ARGS]; + struct fill_buf_param fill_buf = {}; unsigned long cache_total_size = 0; int n = uparams->bits ? : 5; unsigned long long_mask; - char *span_str = NULL; int count_of_bits; size_t span; - int ret, i; + int ret; ret = get_full_cbm("L3", &long_mask); if (ret) @@ -126,43 +141,36 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param } struct resctrl_val_param param = { - .resctrl_val = CMT_STR, .ctrlgrp = "c1", - .mongrp = "m1", .filename = RESULT_FILE_NAME, .mask = ~(long_mask << n) & long_mask, .num_of_runs = 0, + .init = cmt_init, .setup = cmt_setup, + .measure = cmt_measure, }; span = cache_portion_size(cache_total_size, param.mask, long_mask); - if (strcmp(cmd[0], "fill_buf") == 0) { - /* Duplicate the command to be able to replace span in it */ - for (i = 0; uparams->benchmark_cmd[i]; i++) - new_cmd[i] = uparams->benchmark_cmd[i]; - new_cmd[i] = NULL; - - ret = asprintf(&span_str, "%zu", span); - if (ret < 0) - return -1; - new_cmd[1] = span_str; - cmd = new_cmd; + if (uparams->fill_buf) { + fill_buf.buf_size = span; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + fill_buf.buf_size = span; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; } remove(RESULT_FILE_NAME); - ret = resctrl_val(test, uparams, cmd, ¶m); + ret = resctrl_val(test, uparams, ¶m); if (ret) - goto out; + return ret; ret = check_results(¶m, span, n); - if (ret && (get_vendor() == ARCH_INTEL)) - ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); - -out: - cmt_test_cleanup(); - free(span_str); + if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n"); return ret; } @@ -178,4 +186,5 @@ struct resctrl_test cmt_test = { .resource = "L3", .feature_check = cmt_feature_check, .run_test = cmt_run_test, + .cleanup = cmt_test_cleanup, }; diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index ae120f1735c0..19a01a52dc1a 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -88,18 +88,6 @@ static int fill_one_span_read(unsigned char *buf, size_t buf_size) return sum; } -static void fill_one_span_write(unsigned char *buf, size_t buf_size) -{ - unsigned char *end_ptr = buf + buf_size; - unsigned char *p; - - p = buf; - while (p < end_ptr) { - *p = '1'; - p += (CL_SIZE / 2); - } -} - void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) { int ret = 0; @@ -114,20 +102,11 @@ void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) *value_sink = ret; } -static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once) -{ - while (1) { - fill_one_span_write(buf, buf_size); - if (once) - break; - } -} - -unsigned char *alloc_buffer(size_t buf_size, int memflush) +unsigned char *alloc_buffer(size_t buf_size, bool memflush) { void *buf = NULL; uint64_t *p64; - size_t s64; + ssize_t s64; int ret; ret = posix_memalign(&buf, PAGE_SIZE, buf_size); @@ -151,19 +130,15 @@ unsigned char *alloc_buffer(size_t buf_size, int memflush) return buf; } -int run_fill_buf(size_t buf_size, int memflush, int op, bool once) +ssize_t get_fill_buf_size(int cpu_no, const char *cache_type) { - unsigned char *buf; - - buf = alloc_buffer(buf_size, memflush); - if (!buf) - return -1; + unsigned long cache_total_size = 0; + int ret; - if (op == 0) - fill_cache_read(buf, buf_size, once); - else - fill_cache_write(buf, buf_size, once); - free(buf); + ret = get_cache_size(cpu_no, cache_type, &cache_total_size); + if (ret) + return ret; - return 0; + return cache_total_size * 2 > MINIMUM_SPAN ? + cache_total_size * 2 : MINIMUM_SPAN; } diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index 7946e32e85c8..c7e9adc0368f 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -17,6 +17,19 @@ #define ALLOCATION_MIN 10 #define ALLOCATION_STEP 10 +static int mba_init(const struct resctrl_val_param *param, int domain_id) +{ + int ret; + + ret = initialize_read_mem_bw_imc(); + if (ret) + return ret; + + initialize_mem_bw_resctrl(param, domain_id); + + return 0; +} + /* * Change schemata percentage from 100 to 10%. Write schemata to specified * con_mon grp, mon_grp in resctrl FS. @@ -26,7 +39,8 @@ static int mba_setup(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *p) { - static int runs_per_allocation, allocation = 100; + static unsigned int allocation = ALLOCATION_MIN; + static int runs_per_allocation; char allocation_str[64]; int ret; @@ -37,7 +51,7 @@ static int mba_setup(const struct resctrl_test *test, if (runs_per_allocation++ != 0) return 0; - if (allocation < ALLOCATION_MIN || allocation > ALLOCATION_MAX) + if (allocation > ALLOCATION_MAX) return END_OF_TESTS; sprintf(allocation_str, "%d", allocation); @@ -46,37 +60,47 @@ static int mba_setup(const struct resctrl_test *test, if (ret < 0) return ret; - allocation -= ALLOCATION_STEP; + allocation += ALLOCATION_STEP; return 0; } +static int mba_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + return measure_read_mem_bw(uparams, param, bm_pid); +} + static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) { - int allocation, runs; + unsigned int allocation; bool ret = false; + int runs; ksft_print_msg("Results are displayed in (MB)\n"); /* Memory bandwidth from 100% down to 10% */ for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP; allocation++) { - unsigned long avg_bw_imc, avg_bw_resc; unsigned long sum_bw_imc = 0, sum_bw_resc = 0; + long avg_bw_imc, avg_bw_resc; int avg_diff_per; float avg_diff; - /* - * The first run is discarded due to inaccurate value from - * phase transition. - */ - for (runs = NUM_OF_RUNS * allocation + 1; + for (runs = NUM_OF_RUNS * allocation; runs < NUM_OF_RUNS * allocation + NUM_OF_RUNS ; runs++) { sum_bw_imc += bw_imc[runs]; sum_bw_resc += bw_resc[runs]; } - avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1); - avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1); + avg_bw_imc = sum_bw_imc / NUM_OF_RUNS; + avg_bw_resc = sum_bw_resc / NUM_OF_RUNS; + if (avg_bw_imc < THROTTLE_THRESHOLD || avg_bw_resc < THROTTLE_THRESHOLD) { + ksft_print_msg("Bandwidth below threshold (%d MiB). Dropping results from MBA schemata %u.\n", + THROTTLE_THRESHOLD, + ALLOCATION_MIN + ALLOCATION_STEP * allocation); + continue; + } + avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; avg_diff_per = (int)(avg_diff * 100); @@ -84,7 +108,7 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) avg_diff_per > MAX_DIFF_PERCENT ? "Fail:" : "Pass:", MAX_DIFF_PERCENT, - ALLOCATION_MAX - ALLOCATION_STEP * allocation); + ALLOCATION_MIN + ALLOCATION_STEP * allocation); ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); @@ -103,8 +127,9 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) static int check_results(void) { + unsigned long bw_resc[NUM_OF_RUNS * ALLOCATION_MAX / ALLOCATION_STEP]; + unsigned long bw_imc[NUM_OF_RUNS * ALLOCATION_MAX / ALLOCATION_STEP]; char *token_array[8], output[] = RESULT_FILE_NAME, temp[512]; - unsigned long bw_imc[1024], bw_resc[1024]; int runs; FILE *fp; @@ -137,7 +162,7 @@ static int check_results(void) return show_mba_info(bw_imc, bw_resc); } -void mba_test_cleanup(void) +static void mba_test_cleanup(void) { remove(RESULT_FILE_NAME); } @@ -145,25 +170,39 @@ void mba_test_cleanup(void) static int mba_run_test(const struct resctrl_test *test, const struct user_params *uparams) { struct resctrl_val_param param = { - .resctrl_val = MBA_STR, .ctrlgrp = "c1", - .mongrp = "m1", .filename = RESULT_FILE_NAME, - .bw_report = "reads", - .setup = mba_setup + .init = mba_init, + .setup = mba_setup, + .measure = mba_measure, }; + struct fill_buf_param fill_buf = {}; int ret; remove(RESULT_FILE_NAME); - ret = resctrl_val(test, uparams, uparams->benchmark_cmd, ¶m); + if (uparams->fill_buf) { + fill_buf.buf_size = uparams->fill_buf->buf_size; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + ssize_t buf_size; + + buf_size = get_fill_buf_size(uparams->cpu, "L3"); + if (buf_size < 0) + return buf_size; + fill_buf.buf_size = buf_size; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; + } + + ret = resctrl_val(test, uparams, ¶m); if (ret) - goto out; + return ret; ret = check_results(); - -out: - mba_test_cleanup(); + if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n"); return ret; } @@ -180,4 +219,5 @@ struct resctrl_test mba_test = { .vendor_specific = ARCH_INTEL, .feature_check = mba_feature_check, .run_test = mba_run_test, + .cleanup = mba_test_cleanup, }; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index d67ffa3ec63a..84d8bc250539 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -17,22 +17,18 @@ static int show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span) { - unsigned long avg_bw_imc = 0, avg_bw_resc = 0; unsigned long sum_bw_imc = 0, sum_bw_resc = 0; + long avg_bw_imc = 0, avg_bw_resc = 0; int runs, ret, avg_diff_per; float avg_diff = 0; - /* - * Discard the first value which is inaccurate due to monitoring setup - * transition phase. - */ - for (runs = 1; runs < NUM_OF_RUNS ; runs++) { + for (runs = 0; runs < NUM_OF_RUNS; runs++) { sum_bw_imc += bw_imc[runs]; sum_bw_resc += bw_resc[runs]; } - avg_bw_imc = sum_bw_imc / 4; - avg_bw_resc = sum_bw_resc / 4; + avg_bw_imc = sum_bw_imc / NUM_OF_RUNS; + avg_bw_resc = sum_bw_resc / NUM_OF_RUNS; avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; avg_diff_per = (int)(avg_diff * 100); @@ -40,7 +36,8 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span) ksft_print_msg("%s Check MBM diff within %d%%\n", ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT); ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); - ksft_print_msg("Span (MB): %zu\n", span / MB); + if (span) + ksft_print_msg("Span (MB): %zu\n", span / MB); ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); @@ -86,6 +83,19 @@ static int check_results(size_t span) return ret; } +static int mbm_init(const struct resctrl_val_param *param, int domain_id) +{ + int ret; + + ret = initialize_read_mem_bw_imc(); + if (ret) + return ret; + + initialize_mem_bw_resctrl(param, domain_id); + + return 0; +} + static int mbm_setup(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *p) @@ -105,7 +115,13 @@ static int mbm_setup(const struct resctrl_test *test, return ret; } -void mbm_test_cleanup(void) +static int mbm_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + return measure_read_mem_bw(uparams, param, bm_pid); +} + +static void mbm_test_cleanup(void) { remove(RESULT_FILE_NAME); } @@ -113,27 +129,39 @@ void mbm_test_cleanup(void) static int mbm_run_test(const struct resctrl_test *test, const struct user_params *uparams) { struct resctrl_val_param param = { - .resctrl_val = MBM_STR, .ctrlgrp = "c1", - .mongrp = "m1", .filename = RESULT_FILE_NAME, - .bw_report = "reads", - .setup = mbm_setup + .init = mbm_init, + .setup = mbm_setup, + .measure = mbm_measure, }; + struct fill_buf_param fill_buf = {}; int ret; remove(RESULT_FILE_NAME); - ret = resctrl_val(test, uparams, uparams->benchmark_cmd, ¶m); - if (ret) - goto out; + if (uparams->fill_buf) { + fill_buf.buf_size = uparams->fill_buf->buf_size; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + ssize_t buf_size; + + buf_size = get_fill_buf_size(uparams->cpu, "L3"); + if (buf_size < 0) + return buf_size; + fill_buf.buf_size = buf_size; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; + } - ret = check_results(DEFAULT_SPAN); - if (ret && (get_vendor() == ARCH_INTEL)) - ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); + ret = resctrl_val(test, uparams, ¶m); + if (ret) + return ret; -out: - mbm_test_cleanup(); + ret = check_results(param.fill_buf ? param.fill_buf->buf_size : 0); + if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n"); return ret; } @@ -150,4 +178,5 @@ struct resctrl_test mbm_test = { .vendor_specific = ARCH_INTEL, .feature_check = mbm_feature_check, .run_test = mbm_run_test, + .cleanup = mbm_test_cleanup, }; diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 2051bd135e0d..cd3adfc14969 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -11,6 +11,7 @@ #include <signal.h> #include <dirent.h> #include <stdbool.h> +#include <ctype.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/mount.h> @@ -21,6 +22,7 @@ #include <sys/eventfd.h> #include <asm/unistd.h> #include <linux/perf_event.h> +#include <linux/compiler.h> #include "../kselftest.h" #define MB (1024 * 1024) @@ -41,25 +43,48 @@ #define BENCHMARK_ARGS 64 -#define DEFAULT_SPAN (250 * MB) +#define MINIMUM_SPAN (250 * MB) -#define PARENT_EXIT() \ - do { \ - kill(ppid, SIGKILL); \ - umount_resctrlfs(); \ - exit(EXIT_FAILURE); \ - } while (0) +/* + * Memory bandwidth (in MiB) below which the bandwidth comparisons + * between iMC and resctrl are considered unreliable. For example RAS + * features or memory performance features that generate memory traffic + * may drive accesses that are counted differently by performance counters + * and MBM respectively, for instance generating "overhead" traffic which + * is not counted against any specific RMID. + */ +#define THROTTLE_THRESHOLD 750 + +/* + * fill_buf_param: "fill_buf" benchmark parameters + * @buf_size: Size (in bytes) of buffer used in benchmark. + * "fill_buf" allocates and initializes buffer of + * @buf_size. User can change value via command line. + * @memflush: If false the buffer will not be flushed after + * allocation and initialization, otherwise the + * buffer will be flushed. User can change value via + * command line (via integers with 0 interpreted as + * false and anything else as true). + */ +struct fill_buf_param { + size_t buf_size; + bool memflush; +}; /* * user_params: User supplied parameters * @cpu: CPU number to which the benchmark will be bound to * @bits: Number of bits used for cache allocation size * @benchmark_cmd: Benchmark command to run during (some of the) tests + * @fill_buf: Pointer to user provided parameters for "fill_buf", + * NULL if user did not provide parameters and test + * specific defaults should be used. */ struct user_params { int cpu; int bits; const char *benchmark_cmd[BENCHMARK_ARGS]; + const struct fill_buf_param *fill_buf; }; /* @@ -72,6 +97,7 @@ struct user_params { * @disabled: Test is disabled * @feature_check: Callback to check required resctrl features * @run_test: Callback to run the test + * @cleanup: Callback to cleanup after the test */ struct resctrl_test { const char *name; @@ -82,28 +108,40 @@ struct resctrl_test { bool (*feature_check)(const struct resctrl_test *test); int (*run_test)(const struct resctrl_test *test, const struct user_params *uparams); + void (*cleanup)(void); }; /* * resctrl_val_param: resctrl test parameters - * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) * @filename: Name of file to which the o/p should be written - * @bw_report: Bandwidth report type (reads vs writes) - * @setup: Call back function to setup test environment + * @init: Callback function to initialize test environment + * @setup: Callback function to setup per test run environment + * @measure: Callback that performs the measurement (a single test) + * @fill_buf: Parameters for default "fill_buf" benchmark. + * Initialized with user provided parameters, possibly + * adapted to be relevant to the test. If user does + * not provide parameters for "fill_buf" nor a + * replacement benchmark then initialized with defaults + * appropriate for test. NULL if user provided + * benchmark. */ struct resctrl_val_param { - char *resctrl_val; - char ctrlgrp[64]; - char mongrp[64]; - char filename[64]; - char *bw_report; - unsigned long mask; - int num_of_runs; - int (*setup)(const struct resctrl_test *test, - const struct user_params *uparams, - struct resctrl_val_param *param); + const char *ctrlgrp; + const char *mongrp; + char filename[64]; + unsigned long mask; + int num_of_runs; + int (*init)(const struct resctrl_val_param *param, + int domain_id); + int (*setup)(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *param); + int (*measure)(const struct user_params *uparams, + struct resctrl_val_param *param, + pid_t bm_pid); + struct fill_buf_param *fill_buf; }; struct perf_event_read { @@ -113,11 +151,6 @@ struct perf_event_read { } values[2]; }; -#define MBM_STR "mbm" -#define MBA_STR "mba" -#define CMT_STR "cmt" -#define CAT_STR "cat" - /* * Memory location that consumes values compiler must not optimize away. * Volatile ensures writes to this location cannot be optimized away by @@ -125,17 +158,17 @@ struct perf_event_read { */ extern volatile int *value_sink; -extern pid_t bm_pid, ppid; +extern int snc_unreliable; extern char llc_occup_path[1024]; +int snc_nodes_per_l3_cache(void); int get_vendor(void); bool check_resctrlfs_support(void); int filter_dmesg(void); int get_domain_id(const char *resource, int cpu_no, int *domain_id); int mount_resctrlfs(void); int umount_resctrlfs(void); -int validate_bw_report_request(char *bw_report); bool resctrl_resource_exists(const char *resource); bool resctrl_mon_feature_exists(const char *resource, const char *feature); bool resource_info_file_exists(const char *resource, const char *file); @@ -143,22 +176,23 @@ bool test_resource_feature_check(const struct resctrl_test *test); char *fgrep(FILE *inf, const char *str); int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity); int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity); -int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource); -int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, - char *resctrl_val); +int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, + const char *resource); +int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp); int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); -unsigned char *alloc_buffer(size_t buf_size, int memflush); +unsigned char *alloc_buffer(size_t buf_size, bool memflush); void mem_flush(unsigned char *buf, size_t buf_size); void fill_cache_read(unsigned char *buf, size_t buf_size, bool once); -int run_fill_buf(size_t buf_size, int memflush, int op, bool once); +ssize_t get_fill_buf_size(int cpu_no, const char *cache_type); +int initialize_read_mem_bw_imc(void); +int measure_read_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid); +void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, + int domain_id); int resctrl_val(const struct resctrl_test *test, const struct user_params *uparams, - const char * const *benchmark_cmd, struct resctrl_val_param *param); -void tests_cleanup(void); -void mbm_test_cleanup(void); -void mba_test_cleanup(void); unsigned long create_bit_mask(unsigned int start, unsigned int len); unsigned int count_contiguous_bits(unsigned long val, unsigned int *start); int get_full_cbm(const char *cache_type, unsigned long *mask); @@ -166,19 +200,18 @@ int get_mask_no_shareable(const char *cache_type, unsigned long *mask); int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size); int resource_info_unsigned_get(const char *resource, const char *filename, unsigned int *val); void ctrlc_handler(int signum, siginfo_t *info, void *ptr); -int signal_handler_register(void); +int signal_handler_register(const struct resctrl_test *test); void signal_handler_unregister(void); -void cat_test_cleanup(void); unsigned int count_bits(unsigned long n); -void cmt_test_cleanup(void); +int snc_kernel_support(void); void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config); void perf_event_initialize_read_format(struct perf_event_read *pe_read); int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no); int perf_event_reset_enable(int pe_fd); int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, - const char *filename, int bm_pid); -int measure_llc_resctrl(const char *filename, int bm_pid); + const char *filename, pid_t bm_pid); +int measure_llc_resctrl(const char *filename, pid_t bm_pid); void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines); /* diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index f3dc1b9696e7..5154ffd821c4 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -81,19 +81,11 @@ static void cmd_help(void) printf("\t-h: help\n"); } -void tests_cleanup(void) -{ - mbm_test_cleanup(); - mba_test_cleanup(); - cmt_test_cleanup(); - cat_test_cleanup(); -} - -static int test_prepare(void) +static int test_prepare(const struct resctrl_test *test) { int res; - res = signal_handler_register(); + res = signal_handler_register(test); if (res) { ksft_print_msg("Failed to register signal handler\n"); return res; @@ -108,8 +100,10 @@ static int test_prepare(void) return 0; } -static void test_cleanup(void) +static void test_cleanup(const struct resctrl_test *test) { + if (test->cleanup) + test->cleanup(); umount_resctrlfs(); signal_handler_unregister(); } @@ -124,7 +118,7 @@ static bool test_vendor_specific_check(const struct resctrl_test *test) static void run_single_test(const struct resctrl_test *test, const struct user_params *uparams) { - int ret; + int ret, snc_mode; if (test->disabled) return; @@ -134,9 +128,16 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p return; } + snc_mode = snc_nodes_per_l3_cache(); + ksft_print_msg("Starting %s test ...\n", test->name); - if (test_prepare()) { + if (snc_mode == 1 && snc_unreliable && get_vendor() == ARCH_INTEL) { + ksft_test_result_skip("SNC detection unreliable due to offline CPUs. Test results may not be accurate if SNC enabled.\n"); + return; + } + + if (test_prepare(test)) { ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } @@ -151,7 +152,79 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p ksft_test_result(!ret, "%s: test\n", test->name); cleanup: - test_cleanup(); + test_cleanup(test); +} + +/* + * Allocate and initialize a struct fill_buf_param with user provided + * (via "-b fill_buf <fill_buf parameters>") parameters. + * + * Use defaults (that may not be appropriate for all tests) for any + * fill_buf parameters omitted by the user. + * + * Historically it may have been possible for user space to provide + * additional parameters, "operation" ("read" vs "write") in + * benchmark_cmd[3] and "once" (run "once" or until terminated) in + * benchmark_cmd[4]. Changing these parameters have never been + * supported with the default of "read" operation and running until + * terminated built into the tests. Any unsupported values for + * (original) "fill_buf" parameters are treated as failure. + * + * Return: On failure, forcibly exits the test on any parsing failure, + * returns NULL if no parsing needed (user did not actually provide + * "-b fill_buf"). + * On success, returns pointer to newly allocated and fully + * initialized struct fill_buf_param that caller must free. + */ +static struct fill_buf_param *alloc_fill_buf_param(struct user_params *uparams) +{ + struct fill_buf_param *fill_param = NULL; + char *endptr = NULL; + + if (!uparams->benchmark_cmd[0] || strcmp(uparams->benchmark_cmd[0], "fill_buf")) + return NULL; + + fill_param = malloc(sizeof(*fill_param)); + if (!fill_param) + ksft_exit_skip("Unable to allocate memory for fill_buf parameters.\n"); + + if (uparams->benchmark_cmd[1] && *uparams->benchmark_cmd[1] != '\0') { + errno = 0; + fill_param->buf_size = strtoul(uparams->benchmark_cmd[1], &endptr, 10); + if (errno || *endptr != '\0') { + free(fill_param); + ksft_exit_skip("Unable to parse benchmark buffer size.\n"); + } + } else { + fill_param->buf_size = MINIMUM_SPAN; + } + + if (uparams->benchmark_cmd[2] && *uparams->benchmark_cmd[2] != '\0') { + errno = 0; + fill_param->memflush = strtol(uparams->benchmark_cmd[2], &endptr, 10) != 0; + if (errno || *endptr != '\0') { + free(fill_param); + ksft_exit_skip("Unable to parse benchmark memflush parameter.\n"); + } + } else { + fill_param->memflush = true; + } + + if (uparams->benchmark_cmd[3] && *uparams->benchmark_cmd[3] != '\0') { + if (strcmp(uparams->benchmark_cmd[3], "0")) { + free(fill_param); + ksft_exit_skip("Only read operations supported.\n"); + } + } + + if (uparams->benchmark_cmd[4] && *uparams->benchmark_cmd[4] != '\0') { + if (strcmp(uparams->benchmark_cmd[4], "false")) { + free(fill_param); + ksft_exit_skip("fill_buf is required to run until termination.\n"); + } + } + + return fill_param; } static void init_user_params(struct user_params *uparams) @@ -164,11 +237,11 @@ static void init_user_params(struct user_params *uparams) int main(int argc, char **argv) { + struct fill_buf_param *fill_param = NULL; int tests = ARRAY_SIZE(resctrl_tests); bool test_param_seen = false; struct user_params uparams; - char *span_str = NULL; - int ret, c, i; + int c, i; init_user_params(&uparams); @@ -245,6 +318,10 @@ int main(int argc, char **argv) } last_arg: + fill_param = alloc_fill_buf_param(&uparams); + if (fill_param) + uparams.fill_buf = fill_param; + ksft_print_header(); /* @@ -253,34 +330,21 @@ last_arg: * 2. We execute perf commands */ if (geteuid() != 0) - return ksft_exit_skip("Not running as root. Skipping...\n"); + ksft_exit_skip("Not running as root. Skipping...\n"); if (!check_resctrlfs_support()) - return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n"); + ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n"); if (umount_resctrlfs()) - return ksft_exit_skip("resctrl FS unmount failed.\n"); + ksft_exit_skip("resctrl FS unmount failed.\n"); filter_dmesg(); - if (!uparams.benchmark_cmd[0]) { - /* If no benchmark is given by "-b" argument, use fill_buf. */ - uparams.benchmark_cmd[0] = "fill_buf"; - ret = asprintf(&span_str, "%u", DEFAULT_SPAN); - if (ret < 0) - ksft_exit_fail_msg("Out of memory!\n"); - uparams.benchmark_cmd[1] = span_str; - uparams.benchmark_cmd[2] = "1"; - uparams.benchmark_cmd[3] = "0"; - uparams.benchmark_cmd[4] = "false"; - uparams.benchmark_cmd[5] = NULL; - } - ksft_set_plan(tests); for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) run_single_test(resctrl_tests[i], &uparams); - free(span_str); + free(fill_param); ksft_finished(); } diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 5a49f07a6c85..7c08e936572d 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -12,37 +12,14 @@ #define UNCORE_IMC "uncore_imc" #define READ_FILE_NAME "events/cas_count_read" -#define WRITE_FILE_NAME "events/cas_count_write" #define DYN_PMU_PATH "/sys/bus/event_source/devices" #define SCALE 0.00006103515625 #define MAX_IMCS 20 #define MAX_TOKENS 5 -#define READ 0 -#define WRITE 1 -#define CON_MON_MBM_LOCAL_BYTES_PATH \ - "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" #define CON_MBM_LOCAL_BYTES_PATH \ "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" -#define MON_MBM_LOCAL_BYTES_PATH \ - "%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" - -#define MBM_LOCAL_BYTES_PATH \ - "%s/mon_data/mon_L3_%02d/mbm_local_bytes" - -#define CON_MON_LCC_OCCUP_PATH \ - "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" - -#define CON_LCC_OCCUP_PATH \ - "%s/%s/mon_data/mon_L3_%02d/llc_occupancy" - -#define MON_LCC_OCCUP_PATH \ - "%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" - -#define LCC_OCCUP_PATH \ - "%s/mon_data/mon_L3_%02d/llc_occupancy" - struct membw_read_format { __u64 value; /* The value of the event */ __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ @@ -61,84 +38,71 @@ struct imc_counter_config { static char mbm_total_path[1024]; static int imcs; -static struct imc_counter_config imc_counters_config[MAX_IMCS][2]; +static struct imc_counter_config imc_counters_config[MAX_IMCS]; +static const struct resctrl_test *current_test; -void membw_initialize_perf_event_attr(int i, int j) +static void read_mem_bw_initialize_perf_event_attr(int i) { - memset(&imc_counters_config[i][j].pe, 0, + memset(&imc_counters_config[i].pe, 0, sizeof(struct perf_event_attr)); - imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type; - imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr); - imc_counters_config[i][j].pe.disabled = 1; - imc_counters_config[i][j].pe.inherit = 1; - imc_counters_config[i][j].pe.exclude_guest = 0; - imc_counters_config[i][j].pe.config = - imc_counters_config[i][j].umask << 8 | - imc_counters_config[i][j].event; - imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER; - imc_counters_config[i][j].pe.read_format = + imc_counters_config[i].pe.type = imc_counters_config[i].type; + imc_counters_config[i].pe.size = sizeof(struct perf_event_attr); + imc_counters_config[i].pe.disabled = 1; + imc_counters_config[i].pe.inherit = 1; + imc_counters_config[i].pe.exclude_guest = 0; + imc_counters_config[i].pe.config = + imc_counters_config[i].umask << 8 | + imc_counters_config[i].event; + imc_counters_config[i].pe.sample_type = PERF_SAMPLE_IDENTIFIER; + imc_counters_config[i].pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; } -void membw_ioctl_perf_event_ioc_reset_enable(int i, int j) +static void read_mem_bw_ioctl_perf_event_ioc_reset_enable(int i) { - ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0); - ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_RESET, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_ENABLE, 0); } -void membw_ioctl_perf_event_ioc_disable(int i, int j) +static void read_mem_bw_ioctl_perf_event_ioc_disable(int i) { - ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_DISABLE, 0); } /* - * get_event_and_umask: Parse config into event and umask + * get_read_event_and_umask: Parse config into event and umask * @cas_count_cfg: Config * @count: iMC number - * @op: Operation (read/write) */ -void get_event_and_umask(char *cas_count_cfg, int count, bool op) +static void get_read_event_and_umask(char *cas_count_cfg, int count) { char *token[MAX_TOKENS]; int i = 0; - strcat(cas_count_cfg, ","); token[0] = strtok(cas_count_cfg, "=,"); for (i = 1; i < MAX_TOKENS; i++) token[i] = strtok(NULL, "=,"); - for (i = 0; i < MAX_TOKENS; i++) { + for (i = 0; i < MAX_TOKENS - 1; i++) { if (!token[i]) break; - if (strcmp(token[i], "event") == 0) { - if (op == READ) - imc_counters_config[count][READ].event = - strtol(token[i + 1], NULL, 16); - else - imc_counters_config[count][WRITE].event = - strtol(token[i + 1], NULL, 16); - } - if (strcmp(token[i], "umask") == 0) { - if (op == READ) - imc_counters_config[count][READ].umask = - strtol(token[i + 1], NULL, 16); - else - imc_counters_config[count][WRITE].umask = - strtol(token[i + 1], NULL, 16); - } + if (strcmp(token[i], "event") == 0) + imc_counters_config[count].event = strtol(token[i + 1], NULL, 16); + if (strcmp(token[i], "umask") == 0) + imc_counters_config[count].umask = strtol(token[i + 1], NULL, 16); } } -static int open_perf_event(int i, int cpu_no, int j) +static int open_perf_read_event(int i, int cpu_no) { - imc_counters_config[i][j].fd = - perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1, + imc_counters_config[i].fd = + perf_event_open(&imc_counters_config[i].pe, -1, cpu_no, -1, PERF_FLAG_FD_CLOEXEC); - if (imc_counters_config[i][j].fd == -1) { + if (imc_counters_config[i].fd == -1) { fprintf(stderr, "Error opening leader %llx\n", - imc_counters_config[i][j].pe.config); + imc_counters_config[i].pe.config); return -1; } @@ -146,7 +110,7 @@ static int open_perf_event(int i, int cpu_no, int j) return 0; } -/* Get type and config (read and write) of an iMC counter */ +/* Get type and config of an iMC counter's read event. */ static int read_from_imc_dir(char *imc_dir, int count) { char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024]; @@ -160,7 +124,7 @@ static int read_from_imc_dir(char *imc_dir, int count) return -1; } - if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) { + if (fscanf(fp, "%u", &imc_counters_config[count].type) <= 0) { ksft_perror("Could not get iMC type"); fclose(fp); @@ -168,9 +132,6 @@ static int read_from_imc_dir(char *imc_dir, int count) } fclose(fp); - imc_counters_config[count][WRITE].type = - imc_counters_config[count][READ].type; - /* Get read config */ sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME); fp = fopen(imc_counter_cfg, "r"); @@ -179,7 +140,7 @@ static int read_from_imc_dir(char *imc_dir, int count) return -1; } - if (fscanf(fp, "%s", cas_count_cfg) <= 0) { + if (fscanf(fp, "%1023s", cas_count_cfg) <= 0) { ksft_perror("Could not get iMC cas count read"); fclose(fp); @@ -187,34 +148,19 @@ static int read_from_imc_dir(char *imc_dir, int count) } fclose(fp); - get_event_and_umask(cas_count_cfg, count, READ); - - /* Get write config */ - sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME); - fp = fopen(imc_counter_cfg, "r"); - if (!fp) { - ksft_perror("Failed to open iMC config file"); - - return -1; - } - if (fscanf(fp, "%s", cas_count_cfg) <= 0) { - ksft_perror("Could not get iMC cas count write"); - fclose(fp); - - return -1; - } - fclose(fp); - - get_event_and_umask(cas_count_cfg, count, WRITE); + get_read_event_and_umask(cas_count_cfg, count); return 0; } /* * A system can have 'n' number of iMC (Integrated Memory Controller) - * counters, get that 'n'. For each iMC counter get it's type and config. - * Also, each counter has two configs, one for read and the other for write. - * A config again has two parts, event and umask. + * counters, get that 'n'. Discover the properties of the available + * counters in support of needed performance measurement via perf. + * For each iMC counter get it's type and config. Also obtain each + * counter's event and umask for the memory read events that will be + * measured. + * * Enumerate all these details into an array of structures. * * Return: >= 0 on success. < 0 on failure. @@ -275,79 +221,103 @@ static int num_of_imcs(void) return count; } -static int initialize_mem_bw_imc(void) +int initialize_read_mem_bw_imc(void) { - int imc, j; + int imc; imcs = num_of_imcs(); if (imcs <= 0) return imcs; /* Initialize perf_event_attr structures for all iMC's */ - for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) - membw_initialize_perf_event_attr(imc, j); - } + for (imc = 0; imc < imcs; imc++) + read_mem_bw_initialize_perf_event_attr(imc); return 0; } +static void perf_close_imc_read_mem_bw(void) +{ + int mc; + + for (mc = 0; mc < imcs; mc++) { + if (imc_counters_config[mc].fd != -1) + close(imc_counters_config[mc].fd); + } +} + /* - * get_mem_bw_imc: Memory band width as reported by iMC counters - * @cpu_no: CPU number that the benchmark PID is binded to - * @bw_report: Bandwidth report type (reads, writes) - * - * Memory B/W utilized by a process on a socket can be calculated using - * iMC counters. Perf events are used to read these counters. + * perf_open_imc_read_mem_bw - Open perf fds for IMCs + * @cpu_no: CPU number that the benchmark PID is bound to * * Return: = 0 on success. < 0 on failure. */ -static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) +static int perf_open_imc_read_mem_bw(int cpu_no) { - float reads, writes, of_mul_read, of_mul_write; - int imc, j, ret; + int imc, ret; + + for (imc = 0; imc < imcs; imc++) + imc_counters_config[imc].fd = -1; - /* Start all iMC counters to log values (both read and write) */ - reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) { - ret = open_perf_event(imc, cpu_no, j); - if (ret) - return -1; - } - for (j = 0; j < 2; j++) - membw_ioctl_perf_event_ioc_reset_enable(imc, j); + ret = open_perf_read_event(imc, cpu_no); + if (ret) + goto close_fds; } + return 0; + +close_fds: + perf_close_imc_read_mem_bw(); + return -1; +} + +/* + * do_imc_read_mem_bw_test - Perform memory bandwidth test + * + * Runs memory bandwidth test over one second period. Also, handles starting + * and stopping of the IMC perf counters around the test. + */ +static void do_imc_read_mem_bw_test(void) +{ + int imc; + + for (imc = 0; imc < imcs; imc++) + read_mem_bw_ioctl_perf_event_ioc_reset_enable(imc); + sleep(1); - /* Stop counters after a second to get results (both read and write) */ - for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) - membw_ioctl_perf_event_ioc_disable(imc, j); - } + /* Stop counters after a second to get results. */ + for (imc = 0; imc < imcs; imc++) + read_mem_bw_ioctl_perf_event_ioc_disable(imc); +} + +/* + * get_read_mem_bw_imc - Memory read bandwidth as reported by iMC counters + * + * Memory read bandwidth utilized by a process on a socket can be calculated + * using iMC counters' read events. Perf events are used to read these + * counters. + * + * Return: = 0 on success. < 0 on failure. + */ +static int get_read_mem_bw_imc(float *bw_imc) +{ + float reads = 0, of_mul_read = 1; + int imc; /* - * Get results which are stored in struct type imc_counter_config - * Take over flow into consideration before calculating total b/w + * Log read event values from all iMC counters into + * struct imc_counter_config. + * Take overflow into consideration before calculating total bandwidth. */ for (imc = 0; imc < imcs; imc++) { struct imc_counter_config *r = - &imc_counters_config[imc][READ]; - struct imc_counter_config *w = - &imc_counters_config[imc][WRITE]; + &imc_counters_config[imc]; if (read(r->fd, &r->return_value, sizeof(struct membw_read_format)) == -1) { - ksft_perror("Couldn't get read b/w through iMC"); - - return -1; - } - - if (read(w->fd, &w->return_value, - sizeof(struct membw_read_format)) == -1) { - ksft_perror("Couldn't get write bw through iMC"); - + ksft_perror("Couldn't get read bandwidth through iMC"); return -1; } @@ -358,113 +328,52 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) of_mul_read = (float)r_time_enabled / (float)r_time_running; - __u64 w_time_enabled = w->return_value.time_enabled; - __u64 w_time_running = w->return_value.time_running; - - if (w_time_enabled != w_time_running) - of_mul_write = (float)w_time_enabled / - (float)w_time_running; reads += r->return_value.value * of_mul_read * SCALE; - writes += w->return_value.value * of_mul_write * SCALE; - } - - for (imc = 0; imc < imcs; imc++) { - close(imc_counters_config[imc][READ].fd); - close(imc_counters_config[imc][WRITE].fd); - } - - if (strcmp(bw_report, "reads") == 0) { - *bw_imc = reads; - return 0; } - if (strcmp(bw_report, "writes") == 0) { - *bw_imc = writes; - return 0; - } - - *bw_imc = reads + writes; + *bw_imc = reads; return 0; } -void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id) +/* + * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" + * @param: Parameters passed to resctrl_val() + * @domain_id: Domain ID (cache ID; for MB, L3 cache ID) + */ +void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, + int domain_id) { - if (ctrlgrp && mongrp) - sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH, - RESCTRL_PATH, ctrlgrp, mongrp, domain_id); - else if (!ctrlgrp && mongrp) - sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, - mongrp, domain_id); - else if (ctrlgrp && !mongrp) - sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, - ctrlgrp, domain_id); - else if (!ctrlgrp && !mongrp) - sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, - domain_id); + sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, + param->ctrlgrp, domain_id); } /* - * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" - * @ctrlgrp: Name of the control monitor group (con_mon grp) - * @mongrp: Name of the monitor group (mon grp) - * @cpu_no: CPU number that the benchmark PID is binded to - * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) + * Open file to read MBM local bytes from resctrl FS */ -static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp, - int cpu_no, char *resctrl_val) +static FILE *open_mem_bw_resctrl(const char *mbm_bw_file) { - int domain_id; - - if (get_domain_id("MB", cpu_no, &domain_id) < 0) { - ksft_print_msg("Could not get domain ID\n"); - return; - } + FILE *fp; - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) - set_mbm_path(ctrlgrp, mongrp, domain_id); + fp = fopen(mbm_bw_file, "r"); + if (!fp) + ksft_perror("Failed to open total memory bandwidth file"); - if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - if (ctrlgrp) - sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, - RESCTRL_PATH, ctrlgrp, domain_id); - else - sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, - RESCTRL_PATH, domain_id); - } + return fp; } /* * Get MBM Local bytes as reported by resctrl FS - * For MBM, - * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp - * 2. If only con_mon grp is given, then read from con_mon grp - * 3. If both are not given, then read from root con_mon grp - * For MBA, - * 1. If con_mon grp is given, then read from it - * 2. If con_mon grp is not given, then read from root con_mon grp */ -static int get_mem_bw_resctrl(unsigned long *mbm_total) +static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total) { - FILE *fp; - - fp = fopen(mbm_total_path, "r"); - if (!fp) { - ksft_perror("Failed to open total bw file"); - - return -1; - } - if (fscanf(fp, "%lu", mbm_total) <= 0) { - ksft_perror("Could not get mbm local bytes"); - fclose(fp); - + if (fscanf(fp, "%lu\n", mbm_total) <= 0) { + ksft_perror("Could not get MBM local bytes"); return -1; } - fclose(fp); - return 0; } -pid_t bm_pid, ppid; +static pid_t bm_pid; void ctrlc_handler(int signum, siginfo_t *info, void *ptr) { @@ -472,7 +381,8 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr) if (bm_pid) kill(bm_pid, SIGKILL); umount_resctrlfs(); - tests_cleanup(); + if (current_test && current_test->cleanup) + current_test->cleanup(); ksft_print_msg("Ending\n\n"); exit(EXIT_SUCCESS); @@ -482,13 +392,14 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr) * Register CTRL-C handler for parent, as it has to kill * child process before exiting. */ -int signal_handler_register(void) +int signal_handler_register(const struct resctrl_test *test) { struct sigaction sigact = {}; int ret = 0; bm_pid = 0; + current_test = test; sigact.sa_sigaction = ctrlc_handler; sigemptyset(&sigact.sa_mask); sigact.sa_flags = SA_SIGINFO; @@ -510,6 +421,7 @@ void signal_handler_unregister(void) { struct sigaction sigact = {}; + current_test = NULL; sigact.sa_handler = SIG_DFL; sigemptyset(&sigact.sa_mask); if (sigaction(SIGINT, &sigact, NULL) || @@ -528,14 +440,14 @@ void signal_handler_unregister(void) * * Return: 0 on success, < 0 on error. */ -static int print_results_bw(char *filename, int bm_pid, float bw_imc, +static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc, unsigned long bw_resc) { unsigned long diff = fabs(bw_imc - bw_resc); FILE *fp; if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { - printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc); + printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc); printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff); } else { fp = fopen(filename, "a"); @@ -545,7 +457,7 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc, return -1; } if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n", - bm_pid, bw_imc, bw_resc, diff) <= 0) { + (int)bm_pid, bw_imc, bw_resc, diff) <= 0) { ksft_print_msg("Could not log results\n"); fclose(fp); @@ -557,128 +469,63 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc, return 0; } -static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num) -{ - if (strlen(ctrlgrp) && strlen(mongrp)) - sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, - ctrlgrp, mongrp, sock_num); - else if (!strlen(ctrlgrp) && strlen(mongrp)) - sprintf(llc_occup_path, MON_LCC_OCCUP_PATH, RESCTRL_PATH, - mongrp, sock_num); - else if (strlen(ctrlgrp) && !strlen(mongrp)) - sprintf(llc_occup_path, CON_LCC_OCCUP_PATH, RESCTRL_PATH, - ctrlgrp, sock_num); - else if (!strlen(ctrlgrp) && !strlen(mongrp)) - sprintf(llc_occup_path, LCC_OCCUP_PATH, RESCTRL_PATH, sock_num); -} - /* - * initialize_llc_occu_resctrl: Appropriately populate "llc_occup_path" - * @ctrlgrp: Name of the control monitor group (con_mon grp) - * @mongrp: Name of the monitor group (mon grp) - * @cpu_no: CPU number that the benchmark PID is binded to - * @resctrl_val: Resctrl feature (Eg: cat, cmt.. etc) + * measure_read_mem_bw - Measures read memory bandwidth numbers while benchmark runs + * @uparams: User supplied parameters + * @param: Parameters passed to resctrl_val() + * @bm_pid: PID that runs the benchmark + * + * Measure memory bandwidth from resctrl and from another source which is + * perf imc value or could be something else if perf imc event is not + * available. Compare the two values to validate resctrl value. It takes + * 1 sec to measure the data. + * resctrl does not distinguish between read and write operations so + * its data includes all memory operations. */ -static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, - int cpu_no, char *resctrl_val) +int measure_read_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) { - int domain_id; - - if (get_domain_id("L3", cpu_no, &domain_id) < 0) { - ksft_print_msg("Could not get domain ID\n"); - return; - } - - if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) - set_cmt_path(ctrlgrp, mongrp, domain_id); -} - -static int measure_vals(const struct user_params *uparams, - struct resctrl_val_param *param, - unsigned long *bw_resc_start) -{ - unsigned long bw_resc, bw_resc_end; + unsigned long bw_resc, bw_resc_start, bw_resc_end; + FILE *mem_bw_fp; float bw_imc; int ret; - /* - * Measure memory bandwidth from resctrl and from - * another source which is perf imc value or could - * be something else if perf imc event is not available. - * Compare the two values to validate resctrl value. - * It takes 1sec to measure the data. - */ - ret = get_mem_bw_imc(uparams->cpu, param->bw_report, &bw_imc); - if (ret < 0) - return ret; + mem_bw_fp = open_mem_bw_resctrl(mbm_total_path); + if (!mem_bw_fp) + return -1; - ret = get_mem_bw_resctrl(&bw_resc_end); + ret = perf_open_imc_read_mem_bw(uparams->cpu); if (ret < 0) - return ret; + goto close_fp; - bw_resc = (bw_resc_end - *bw_resc_start) / MB; - ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); - if (ret) - return ret; + ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start); + if (ret < 0) + goto close_imc; - *bw_resc_start = bw_resc_end; + rewind(mem_bw_fp); - return 0; -} + do_imc_read_mem_bw_test(); -/* - * run_benchmark - Run a specified benchmark or fill_buf (default benchmark) - * in specified signal. Direct benchmark stdio to /dev/null. - * @signum: signal number - * @info: signal info - * @ucontext: user context in signal handling - */ -static void run_benchmark(int signum, siginfo_t *info, void *ucontext) -{ - int operation, ret, memflush; - char **benchmark_cmd; - size_t span; - bool once; - FILE *fp; + ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end); + if (ret < 0) + goto close_imc; - benchmark_cmd = info->si_ptr; + ret = get_read_mem_bw_imc(&bw_imc); + if (ret < 0) + goto close_imc; - /* - * Direct stdio of child to /dev/null, so that only parent writes to - * stdio (console) - */ - fp = freopen("/dev/null", "w", stdout); - if (!fp) { - ksft_perror("Unable to direct benchmark status to /dev/null"); - PARENT_EXIT(); - } + perf_close_imc_read_mem_bw(); + fclose(mem_bw_fp); - if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { - /* Execute default fill_buf benchmark */ - span = strtoul(benchmark_cmd[1], NULL, 10); - memflush = atoi(benchmark_cmd[2]); - operation = atoi(benchmark_cmd[3]); - if (!strcmp(benchmark_cmd[4], "true")) { - once = true; - } else if (!strcmp(benchmark_cmd[4], "false")) { - once = false; - } else { - ksft_print_msg("Invalid once parameter\n"); - PARENT_EXIT(); - } + bw_resc = (bw_resc_end - bw_resc_start) / MB; - if (run_fill_buf(span, memflush, operation, once)) - fprintf(stderr, "Error in running fill buffer\n"); - } else { - /* Execute specified benchmark */ - ret = execvp(benchmark_cmd[0], benchmark_cmd); - if (ret) - ksft_perror("execvp"); - } + return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); - fclose(stdout); - ksft_print_msg("Unable to run specified benchmark\n"); - PARENT_EXIT(); +close_imc: + perf_close_imc_read_mem_bw(); +close_fp: + fclose(mem_bw_fp); + return ret; } /* @@ -686,145 +533,88 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext) * the benchmark * @test: test information structure * @uparams: user supplied parameters - * @benchmark_cmd: benchmark command and its arguments * @param: parameters passed to resctrl_val() * * Return: 0 when the test was run, < 0 on error. */ int resctrl_val(const struct resctrl_test *test, const struct user_params *uparams, - const char * const *benchmark_cmd, struct resctrl_val_param *param) { - char *resctrl_val = param->resctrl_val; - unsigned long bw_resc_start = 0; - struct sigaction sigact; - int ret = 0, pipefd[2]; - char pipe_message = 0; - union sigval value; + unsigned char *buf = NULL; + cpu_set_t old_affinity; + int domain_id; + int ret = 0; + pid_t ppid; if (strcmp(param->filename, "") == 0) sprintf(param->filename, "stdio"); - if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) || - !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { - ret = validate_bw_report_request(param->bw_report); - if (ret) - return ret; + ret = get_domain_id(test->resource, uparams->cpu, &domain_id); + if (ret < 0) { + ksft_print_msg("Could not get domain ID\n"); + return ret; } - /* - * If benchmark wasn't successfully started by child, then child should - * kill parent, so save parent's pid - */ ppid = getpid(); - if (pipe(pipefd)) { - ksft_perror("Unable to create pipe"); + /* Taskset test to specified CPU. */ + ret = taskset_benchmark(ppid, uparams->cpu, &old_affinity); + if (ret) + return ret; - return -1; + /* Write test to specified control & monitoring group in resctrl FS. */ + ret = write_bm_pid_to_resctrl(ppid, param->ctrlgrp, param->mongrp); + if (ret) + goto reset_affinity; + + if (param->init) { + ret = param->init(param, domain_id); + if (ret) + goto reset_affinity; } /* - * Fork to start benchmark, save child's pid so that it can be killed - * when needed + * If not running user provided benchmark, run the default + * "fill_buf". First phase of "fill_buf" is to prepare the + * buffer that the benchmark will operate on. No measurements + * are needed during this phase and prepared memory will be + * passed to next part of benchmark via copy-on-write thus + * no impact on the benchmark that relies on reading from + * memory only. */ + if (param->fill_buf) { + buf = alloc_buffer(param->fill_buf->buf_size, + param->fill_buf->memflush); + if (!buf) { + ret = -ENOMEM; + goto reset_affinity; + } + } + fflush(stdout); bm_pid = fork(); if (bm_pid == -1) { + ret = -errno; ksft_perror("Unable to fork"); - - return -1; - } - - if (bm_pid == 0) { - /* - * Mask all signals except SIGUSR1, parent uses SIGUSR1 to - * start benchmark - */ - sigfillset(&sigact.sa_mask); - sigdelset(&sigact.sa_mask, SIGUSR1); - - sigact.sa_sigaction = run_benchmark; - sigact.sa_flags = SA_SIGINFO; - - /* Register for "SIGUSR1" signal from parent */ - if (sigaction(SIGUSR1, &sigact, NULL)) { - ksft_perror("Can't register child for signal"); - PARENT_EXIT(); - } - - /* Tell parent that child is ready */ - close(pipefd[0]); - pipe_message = 1; - if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) < - sizeof(pipe_message)) { - ksft_perror("Failed signaling parent process"); - close(pipefd[1]); - return -1; - } - close(pipefd[1]); - - /* Suspend child until delivery of "SIGUSR1" from parent */ - sigsuspend(&sigact.sa_mask); - - ksft_perror("Child is done"); - PARENT_EXIT(); + goto free_buf; } - ksft_print_msg("Benchmark PID: %d\n", bm_pid); - /* - * The cast removes constness but nothing mutates benchmark_cmd within - * the context of this process. At the receiving process, it becomes - * argv, which is mutable, on exec() but that's after fork() so it - * doesn't matter for the process running the tests. + * What needs to be measured runs in separate process until + * terminated. */ - value.sival_ptr = (void *)benchmark_cmd; - - /* Taskset benchmark to specified cpu */ - ret = taskset_benchmark(bm_pid, uparams->cpu, NULL); - if (ret) - goto out; - - /* Write benchmark to specified control&monitoring grp in resctrl FS */ - ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, - resctrl_val); - if (ret) - goto out; - - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || - !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - ret = initialize_mem_bw_imc(); - if (ret) - goto out; - - initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp, - uparams->cpu, resctrl_val); - } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) - initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp, - uparams->cpu, resctrl_val); - - /* Parent waits for child to be ready. */ - close(pipefd[1]); - while (pipe_message != 1) { - if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) < - sizeof(pipe_message)) { - ksft_perror("Failed reading message from child process"); - close(pipefd[0]); - goto out; - } + if (bm_pid == 0) { + if (param->fill_buf) + fill_cache_read(buf, param->fill_buf->buf_size, false); + else if (uparams->benchmark_cmd[0]) + execvp(uparams->benchmark_cmd[0], (char **)uparams->benchmark_cmd); + exit(EXIT_SUCCESS); } - close(pipefd[0]); - /* Signal child to start benchmark */ - if (sigqueue(bm_pid, SIGUSR1, value) == -1) { - ksft_perror("sigqueue SIGUSR1 to child"); - ret = -1; - goto out; - } + ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); - /* Give benchmark enough time to fully run */ + /* Give benchmark enough time to fully run. */ sleep(1); /* Test runs until the callback setup() tells the test to stop. */ @@ -837,21 +627,15 @@ int resctrl_val(const struct resctrl_test *test, if (ret < 0) break; - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || - !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - ret = measure_vals(uparams, param, &bw_resc_start); - if (ret) - break; - } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { - sleep(1); - ret = measure_llc_resctrl(param->filename, bm_pid); - if (ret) - break; - } + ret = param->measure(uparams, param, bm_pid); + if (ret) + break; } -out: kill(bm_pid, SIGKILL); - +free_buf: + free(buf); +reset_affinity: + taskset_restore(ppid, &old_affinity); return ret; } diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 1cade75176eb..195f04c4d158 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -13,6 +13,8 @@ #include "resctrl.h" +int snc_unreliable; + static int find_resctrl_mount(char *buffer) { FILE *mounts; @@ -157,6 +159,98 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id) } /* + * Count number of CPUs in a /sys bitmap + */ +static unsigned int count_sys_bitmap_bits(char *name) +{ + FILE *fp = fopen(name, "r"); + int count = 0, c; + + if (!fp) + return 0; + + while ((c = fgetc(fp)) != EOF) { + if (!isxdigit(c)) + continue; + switch (c) { + case 'f': + count++; + fallthrough; + case '7': case 'b': case 'd': case 'e': + count++; + fallthrough; + case '3': case '5': case '6': case '9': case 'a': case 'c': + count++; + fallthrough; + case '1': case '2': case '4': case '8': + count++; + break; + } + } + fclose(fp); + + return count; +} + +static bool cpus_offline_empty(void) +{ + char offline_cpus_str[64]; + FILE *fp; + + fp = fopen("/sys/devices/system/cpu/offline", "r"); + if (!fp) { + ksft_perror("Could not open /sys/devices/system/cpu/offline"); + return 0; + } + + if (fscanf(fp, "%63s", offline_cpus_str) < 0) { + if (!errno) { + fclose(fp); + return 1; + } + ksft_perror("Could not read /sys/devices/system/cpu/offline"); + } + + fclose(fp); + + return 0; +} + +/* + * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0. + * If any CPUs are offline declare the detection as unreliable. + */ +int snc_nodes_per_l3_cache(void) +{ + int node_cpus, cache_cpus; + static int snc_mode; + + if (!snc_mode) { + snc_mode = 1; + if (!cpus_offline_empty()) { + ksft_print_msg("Runtime SNC detection unreliable due to offline CPUs.\n"); + ksft_print_msg("Setting SNC mode to disabled.\n"); + snc_unreliable = 1; + return snc_mode; + } + node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap"); + cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map"); + + if (!node_cpus || !cache_cpus) { + ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n"); + snc_unreliable = 1; + return snc_mode; + } + snc_mode = cache_cpus / node_cpus; + + if (snc_mode > 1) + ksft_print_msg("SNC-%d mode discovered.\n", snc_mode); + } + + return snc_mode; +} + +/* * get_cache_size - Get cache size for a specified CPU * @cpu_no: CPU number * @cache_type: Cache level L2/L3 @@ -182,7 +276,7 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size return -1; } - if (fscanf(fp, "%s", cache_str) <= 0) { + if (fscanf(fp, "%63s", cache_str) <= 0) { ksft_perror("Could not get cache_size"); fclose(fp); @@ -211,6 +305,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size break; } + /* + * The amount of cache represented by each bit in the masks + * in the schemata file is reduced by a factor equal to SNC + * nodes per L3 cache. + * E.g. on a SNC-2 system with a 100MB L3 cache a test that + * allocates memory from its local SNC node (default behavior + * without using libnuma) will only see 50 MB llc_occupancy + * with a fully populated L3 mask in the schemata file. + */ + if (cache_num == 3) + *cache_size /= snc_nodes_per_l3_cache(); return 0; } @@ -456,6 +561,9 @@ int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity) * @grp: Full path and name of the group * @parent_grp: Full path and name of the parent group * + * Creates a group @grp_name if it does not exist yet. If @grp_name is NULL, + * it is interpreted as the root group which always results in success. + * * Return: 0 on success, < 0 on error. */ static int create_grp(const char *grp_name, char *grp, const char *parent_grp) @@ -464,12 +572,7 @@ static int create_grp(const char *grp_name, char *grp, const char *parent_grp) struct dirent *ep; DIR *dp; - /* - * At this point, we are guaranteed to have resctrl FS mounted and if - * length of grp_name == 0, it means, user wants to use root con_mon - * grp, so do nothing - */ - if (strlen(grp_name) == 0) + if (!grp_name) return 0; /* Check if requested grp exists or not */ @@ -508,7 +611,7 @@ static int write_pid_to_tasks(char *tasks, pid_t pid) return -1; } - if (fprintf(fp, "%d\n", pid) < 0) { + if (fprintf(fp, "%d\n", (int)pid) < 0) { ksft_print_msg("Failed to write pid to tasks file\n"); fclose(fp); @@ -524,7 +627,6 @@ static int write_pid_to_tasks(char *tasks, pid_t pid) * @bm_pid: PID that should be written * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) - * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) * * If a con_mon grp is requested, create it and write pid to it, otherwise * write pid to root con_mon grp. @@ -534,14 +636,13 @@ static int write_pid_to_tasks(char *tasks, pid_t pid) * * Return: 0 on success, < 0 on error. */ -int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, - char *resctrl_val) +int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp) { char controlgroup[128], monitorgroup[512], monitorgroup_p[256]; char tasks[1024]; int ret = 0; - if (strlen(ctrlgrp)) + if (ctrlgrp) sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp); else sprintf(controlgroup, "%s", RESCTRL_PATH); @@ -555,22 +656,19 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, if (ret) goto out; - /* Create mon grp and write pid into it for "mbm" and "cmt" test */ - if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) || - !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { - if (strlen(mongrp)) { - sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); - sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); - ret = create_grp(mongrp, monitorgroup, monitorgroup_p); - if (ret) - goto out; - - sprintf(tasks, "%s/mon_groups/%s/tasks", - controlgroup, mongrp); - ret = write_pid_to_tasks(tasks, bm_pid); - if (ret) - goto out; - } + /* Create monitor group and write pid into if it is used */ + if (mongrp) { + sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); + sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); + ret = create_grp(mongrp, monitorgroup, monitorgroup_p); + if (ret) + goto out; + + sprintf(tasks, "%s/mon_groups/%s/tasks", + controlgroup, mongrp); + ret = write_pid_to_tasks(tasks, bm_pid); + if (ret) + goto out; } out: @@ -593,7 +691,8 @@ out: * * Return: 0 on success, < 0 on error. */ -int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource) +int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, + const char *resource) { char controlgroup[1024], reason[128], schema[1024] = {}; int domain_id, fd, schema_len, ret = 0; @@ -611,7 +710,7 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resour goto out; } - if (strlen(ctrlgrp) != 0) + if (ctrlgrp) sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp); else sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); @@ -837,24 +936,6 @@ int filter_dmesg(void) return 0; } -int validate_bw_report_request(char *bw_report) -{ - if (strcmp(bw_report, "reads") == 0) - return 0; - if (strcmp(bw_report, "writes") == 0) - return 0; - if (strcmp(bw_report, "nt-writes") == 0) { - strcpy(bw_report, "writes"); - return 0; - } - if (strcmp(bw_report, "total") == 0) - return 0; - - fprintf(stderr, "Requested iMC B/W report type unavailable\n"); - - return -1; -} - int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { @@ -876,3 +957,35 @@ unsigned int count_bits(unsigned long n) return count; } + +/** + * snc_kernel_support - Check for existence of mon_sub_L3_00 file that indicates + * SNC resctrl support on the kernel side. + * + * Return: 0 if not supported, 1 if SNC is disabled or SNC discovery is + * unreliable or SNC is both enabled and supported. + */ +int snc_kernel_support(void) +{ + char node_path[PATH_MAX]; + struct stat statbuf; + int ret; + + ret = snc_nodes_per_l3_cache(); + /* + * If SNC is disabled then its kernel support isn't important. If SNC + * got disabled because the discovery process was unreliable the + * snc_unreliable variable was set. It can be used to verify the SNC + * discovery reliability elsewhere in the selftest. + */ + if (ret == 1) + return ret; + + snprintf(node_path, sizeof(node_path), "%s/%s", RESCTRL_PATH, + "mon_data/mon_L3_00/mon_sub_L3_00"); + + if (!stat(node_path, &statbuf)) + return 1; + + return 0; +} |