diff options
Diffstat (limited to 'tools/testing/selftests/resctrl')
| -rw-r--r-- | tools/testing/selftests/resctrl/.gitignore | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/Makefile | 13 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/README | 78 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/cache.c | 189 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/cat_test.c | 402 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/cmt_test.c | 190 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/config | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/fill_buf.c | 144 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/mba_test.c | 223 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/mbm_test.c | 182 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/resctrl.h | 248 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/resctrl_tests.c | 350 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/resctrl_val.c | 641 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/resctrlfs.c | 991 | ||||
| -rw-r--r-- | tools/testing/selftests/resctrl/settings | 3 |
15 files changed, 3658 insertions, 0 deletions
diff --git a/tools/testing/selftests/resctrl/.gitignore b/tools/testing/selftests/resctrl/.gitignore new file mode 100644 index 000000000000..ab68442b6bc8 --- /dev/null +++ b/tools/testing/selftests/resctrl/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +resctrl_tests diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile new file mode 100644 index 000000000000..984534cfbf1b --- /dev/null +++ b/tools/testing/selftests/resctrl/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 +CFLAGS += $(KHDR_INCLUDES) + +TEST_GEN_PROGS := resctrl_tests + +LOCAL_HDRS += $(wildcard *.h) + +include ../lib.mk +CFLAGS += -I$(top_srcdir)/tools/include + +$(OUTPUT)/resctrl_tests: $(wildcard *.c) diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README new file mode 100644 index 000000000000..8d11ce7c2ee5 --- /dev/null +++ b/tools/testing/selftests/resctrl/README @@ -0,0 +1,78 @@ +resctrl_tests - resctrl file system test suit + +Authors: + Fenghua Yu <fenghua.yu@intel.com> + Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, + +resctrl_tests tests various resctrl functionalities and interfaces including +both software and hardware. + +Currently it supports Memory Bandwidth Monitoring test and Memory Bandwidth +Allocation test on Intel RDT hardware. More tests will be added in the future. +And the test suit can be extended to cover AMD QoS and ARM MPAM hardware +as well. + +resctrl_tests can be run with or without kselftest framework. + +WITH KSELFTEST FRAMEWORK +======================= + +BUILD +----- + +Build executable file "resctrl_tests" from top level directory of the kernel source: + $ make -C tools/testing/selftests TARGETS=resctrl + +RUN +--- + +Run resctrl_tests as sudo or root since the test needs to mount resctrl file +system and change contents in the file system. +Using kselftest framework will run all supported tests within resctrl_tests: + + $ sudo make -C tools/testing/selftests TARGETS=resctrl run_tests + +More details about kselftest framework can be found in +Documentation/dev-tools/kselftest.rst. + +WITHOUT KSELFTEST FRAMEWORK +=========================== + +BUILD +----- + +Build executable file "resctrl_tests" from this directory(tools/testing/selftests/resctrl/): + $ make + +RUN +--- + +Run resctrl_tests as sudo or root since the test needs to mount resctrl file +system and change contents in the file system. +Executing the test without any parameter will run all supported tests: + + $ sudo ./resctrl_tests + +OVERVIEW OF EXECUTION +===================== + +A test case has four stages: + + - setup: mount resctrl file system, create group, setup schemata, move test + process pids to tasks, start benchmark. + - execute: let benchmark run + - verify: get resctrl data and verify the data with another source, e.g. + perf event. + - teardown: umount resctrl and clear temporary files. + +ARGUMENTS +========= + +Parameter '-h' shows usage information. + +usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits] + -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT default benchmark is builtin fill_buf + -t test list: run tests specified in the test list, e.g. -t mbm,mba,cmt,cat + -n no_of_bits: run cache tests using specified no of bits in cache bit mask + -p cpu_no: specify CPU number to run the test. 1 is default + -h: help diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c new file mode 100644 index 000000000000..1ff1104e6575 --- /dev/null +++ b/tools/testing/selftests/resctrl/cache.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdint.h> +#include "resctrl.h" + +char llc_occup_path[1024]; + +void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config) +{ + memset(pea, 0, sizeof(*pea)); + pea->type = PERF_TYPE_HARDWARE; + pea->size = sizeof(*pea); + pea->read_format = PERF_FORMAT_GROUP; + pea->exclude_kernel = 1; + pea->exclude_hv = 1; + pea->exclude_idle = 1; + pea->exclude_callchain_kernel = 1; + pea->inherit = 1; + pea->exclude_guest = 1; + pea->disabled = 1; + pea->config = config; +} + +/* Start counters to log values */ +int perf_event_reset_enable(int pe_fd) +{ + int ret; + + ret = ioctl(pe_fd, PERF_EVENT_IOC_RESET, 0); + if (ret < 0) + return ret; + + ret = ioctl(pe_fd, PERF_EVENT_IOC_ENABLE, 0); + if (ret < 0) + return ret; + + return 0; +} + +void perf_event_initialize_read_format(struct perf_event_read *pe_read) +{ + memset(pe_read, 0, sizeof(*pe_read)); + pe_read->nr = 1; +} + +int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no) +{ + int pe_fd; + + pe_fd = perf_event_open(pea, pid, cpu_no, -1, PERF_FLAG_FD_CLOEXEC); + if (pe_fd == -1) { + ksft_perror("Error opening leader"); + return -1; + } + + perf_event_reset_enable(pe_fd); + + return pe_fd; +} + +/* + * Get LLC Occupancy as reported by RESCTRL FS + * For CMT, + * 1. If con_mon grp and mon grp given, then read from mon grp in + * con_mon grp + * 2. If only con_mon grp given, then read from con_mon grp + * 3. If both not given, then read from root con_mon grp + * For CAT, + * 1. If con_mon grp given, then read from it + * 2. If con_mon grp not given, then read from root con_mon grp + * + * Return: =0 on success. <0 on failure. + */ +static int get_llc_occu_resctrl(unsigned long *llc_occupancy) +{ + FILE *fp; + + fp = fopen(llc_occup_path, "r"); + if (!fp) { + ksft_perror("Failed to open results file"); + + return -1; + } + if (fscanf(fp, "%lu", llc_occupancy) <= 0) { + ksft_perror("Could not get llc occupancy"); + fclose(fp); + + return -1; + } + fclose(fp); + + return 0; +} + +/* + * print_results_cache: the cache results are stored in a file + * @filename: file that stores the results + * @bm_pid: child pid that runs benchmark + * @llc_value: perf miss value / + * llc occupancy value reported by resctrl FS + * + * Return: 0 on success, < 0 on error. + */ +static int print_results_cache(const char *filename, pid_t bm_pid, __u64 llc_value) +{ + FILE *fp; + + if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { + printf("Pid: %d \t LLC_value: %llu\n", (int)bm_pid, llc_value); + } else { + fp = fopen(filename, "a"); + if (!fp) { + ksft_perror("Cannot open results file"); + + return -1; + } + fprintf(fp, "Pid: %d \t llc_value: %llu\n", (int)bm_pid, llc_value); + fclose(fp); + } + + return 0; +} + +/* + * perf_event_measure - Measure perf events + * @filename: Filename for writing the results + * @bm_pid: PID that runs the benchmark + * + * Measures perf events (e.g., cache misses) and writes the results into + * @filename. @bm_pid is written to the results file along with the measured + * value. + * + * Return: =0 on success. <0 on failure. + */ +int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, + const char *filename, pid_t bm_pid) +{ + int ret; + + /* Stop counters after one span to get miss rate */ + ret = ioctl(pe_fd, PERF_EVENT_IOC_DISABLE, 0); + if (ret < 0) + return ret; + + ret = read(pe_fd, pe_read, sizeof(*pe_read)); + if (ret == -1) { + ksft_perror("Could not get perf value"); + return -1; + } + + return print_results_cache(filename, bm_pid, pe_read->values[0].value); +} + +/* + * measure_llc_resctrl - Measure resctrl LLC value from resctrl + * @filename: Filename for writing the results + * @bm_pid: PID that runs the benchmark + * + * Measures LLC occupancy from resctrl and writes the results into @filename. + * @bm_pid is written to the results file along with the measured value. + * + * Return: =0 on success. <0 on failure. + */ +int measure_llc_resctrl(const char *filename, pid_t bm_pid) +{ + unsigned long llc_occu_resc = 0; + int ret; + + ret = get_llc_occu_resctrl(&llc_occu_resc); + if (ret < 0) + return ret; + + return print_results_cache(filename, bm_pid, llc_occu_resc); +} + +/* + * show_cache_info - Show generic cache test information + * @no_of_bits: Number of bits + * @avg_llc_val: Average of LLC cache result data + * @cache_span: Cache span + * @lines: @cache_span in lines or bytes + */ +void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines) +{ + ksft_print_msg("Number of bits: %d\n", no_of_bits); + ksft_print_msg("Average LLC val: %llu\n", avg_llc_val); + ksft_print_msg("Cache span (%s): %zu\n", lines ? "lines" : "bytes", + cache_span); +} diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c new file mode 100644 index 000000000000..94cfdba5308d --- /dev/null +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cache Allocation Technology (CAT) test + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, + * Fenghua Yu <fenghua.yu@intel.com> + */ +#include "resctrl.h" +#include <unistd.h> + +#define RESULT_FILE_NAME "result_cat" +#define NUM_OF_RUNS 5 + +/* + * Minimum difference in LLC misses between a test with n+1 bits CBM to the + * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4 + * bits in the CBM mask, the minimum difference must be at least + * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent. + * + * The relationship between number of used CBM bits and difference in LLC + * misses is not expected to be linear. With a small number of bits, the + * margin is smaller than with larger number of bits. For selftest purposes, + * however, linear approach is enough because ultimately only pass/fail + * decision has to be made and distinction between strong and stronger + * signal is irrelevant. + */ +#define MIN_DIFF_PERCENT_PER_BIT 1UL + +static int show_results_info(__u64 sum_llc_val, int no_of_bits, + unsigned long cache_span, + unsigned long min_diff_percent, + unsigned long num_of_runs, bool platform, + __s64 *prev_avg_llc_val) +{ + __u64 avg_llc_val = 0; + float avg_diff; + int ret = 0; + + avg_llc_val = sum_llc_val / num_of_runs; + if (*prev_avg_llc_val) { + float delta = (__s64)(avg_llc_val - *prev_avg_llc_val); + + avg_diff = delta / *prev_avg_llc_val; + ret = platform && (avg_diff * 100) < (float)min_diff_percent; + + ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n", + ret ? "Fail:" : "Pass:", (float)min_diff_percent); + + ksft_print_msg("Percent diff=%.1f\n", avg_diff * 100); + } + *prev_avg_llc_val = avg_llc_val; + + show_cache_info(no_of_bits, avg_llc_val, cache_span, true); + + return ret; +} + +/* Remove the highest bit from CBM */ +static unsigned long next_mask(unsigned long current_mask) +{ + return current_mask & (current_mask >> 1); +} + +static int check_results(struct resctrl_val_param *param, const char *cache_type, + unsigned long cache_total_size, unsigned long full_cache_mask, + unsigned long current_mask) +{ + char *token_array[8], temp[512]; + __u64 sum_llc_perf_miss = 0; + __s64 prev_avg_llc_val = 0; + unsigned long alloc_size; + int runs = 0; + int fail = 0; + int ret; + FILE *fp; + + ksft_print_msg("Checking for pass/fail\n"); + fp = fopen(param->filename, "r"); + if (!fp) { + ksft_perror("Cannot open file"); + + return -1; + } + + while (fgets(temp, sizeof(temp), fp)) { + char *token = strtok(temp, ":\t"); + int fields = 0; + int bits; + + while (token) { + token_array[fields++] = token; + token = strtok(NULL, ":\t"); + } + + sum_llc_perf_miss += strtoull(token_array[3], NULL, 0); + runs++; + + if (runs < NUM_OF_RUNS) + continue; + + if (!current_mask) { + ksft_print_msg("Unexpected empty cache mask\n"); + break; + } + + alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask); + + bits = count_bits(current_mask); + + ret = show_results_info(sum_llc_perf_miss, bits, + alloc_size / 64, + MIN_DIFF_PERCENT_PER_BIT * (bits - 1), + runs, get_vendor() == ARCH_INTEL, + &prev_avg_llc_val); + if (ret) + fail = 1; + + runs = 0; + sum_llc_perf_miss = 0; + current_mask = next_mask(current_mask); + } + + fclose(fp); + + return fail; +} + +static void cat_test_cleanup(void) +{ + remove(RESULT_FILE_NAME); +} + +/* + * cat_test - Execute CAT benchmark and measure cache misses + * @test: Test information structure + * @uparams: User supplied parameters + * @param: Parameters passed to cat_test() + * @span: Buffer size for the benchmark + * @current_mask Start mask for the first iteration + * + * Run CAT selftest by varying the allocated cache portion and comparing the + * impact on cache misses (the result analysis is done in check_results() + * and show_results_info(), not in this function). + * + * One bit is removed from the CAT allocation bit mask (in current_mask) for + * each subsequent test which keeps reducing the size of the allocated cache + * portion. A single test flushes the buffer, reads it to warm up the cache, + * and reads the buffer again. The cache misses are measured during the last + * read pass. + * + * Return: 0 when the test was run, < 0 on error. + */ +static int cat_test(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *param, + size_t span, unsigned long current_mask) +{ + struct perf_event_read pe_read; + struct perf_event_attr pea; + cpu_set_t old_affinity; + unsigned char *buf; + char schemata[64]; + int ret, i, pe_fd; + pid_t bm_pid; + + if (strcmp(param->filename, "") == 0) + sprintf(param->filename, "stdio"); + + bm_pid = getpid(); + + /* Taskset benchmark to specified cpu */ + ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity); + if (ret) + return ret; + + /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ + ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); + if (ret) + goto reset_affinity; + + perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES); + perf_event_initialize_read_format(&pe_read); + pe_fd = perf_open(&pea, bm_pid, uparams->cpu); + if (pe_fd < 0) { + ret = -1; + goto reset_affinity; + } + + buf = alloc_buffer(span, 1); + if (!buf) { + ret = -1; + goto pe_close; + } + + while (current_mask) { + snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask); + ret = write_schemata("", schemata, uparams->cpu, test->resource); + if (ret) + goto free_buf; + snprintf(schemata, sizeof(schemata), "%lx", current_mask); + ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource); + if (ret) + goto free_buf; + + for (i = 0; i < NUM_OF_RUNS; i++) { + mem_flush(buf, span); + fill_cache_read(buf, span, true); + + ret = perf_event_reset_enable(pe_fd); + if (ret) + goto free_buf; + + fill_cache_read(buf, span, true); + + ret = perf_event_measure(pe_fd, &pe_read, param->filename, bm_pid); + if (ret) + goto free_buf; + } + current_mask = next_mask(current_mask); + } + +free_buf: + free(buf); +pe_close: + close(pe_fd); +reset_affinity: + taskset_restore(bm_pid, &old_affinity); + + return ret; +} + +static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) +{ + unsigned long long_mask, start_mask, full_cache_mask; + unsigned long cache_total_size = 0; + int n = uparams->bits; + unsigned int start; + int count_of_bits; + size_t span; + int ret; + + ret = get_full_cbm(test->resource, &full_cache_mask); + if (ret) + return ret; + /* Get the largest contiguous exclusive portion of the cache */ + ret = get_mask_no_shareable(test->resource, &long_mask); + if (ret) + return ret; + + /* Get L3/L2 cache size */ + ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size); + if (ret) + return ret; + ksft_print_msg("Cache size :%lu\n", cache_total_size); + + count_of_bits = count_contiguous_bits(long_mask, &start); + + if (!n) + n = count_of_bits / 2; + + if (n > count_of_bits - 1) { + ksft_print_msg("Invalid input value for no_of_bits n!\n"); + ksft_print_msg("Please enter value in range 1 to %d\n", + count_of_bits - 1); + return -1; + } + start_mask = create_bit_mask(start, n); + + struct resctrl_val_param param = { + .ctrlgrp = "c1", + .filename = RESULT_FILE_NAME, + .num_of_runs = 0, + }; + param.mask = long_mask; + span = cache_portion_size(cache_total_size, start_mask, full_cache_mask); + + remove(param.filename); + + ret = cat_test(test, uparams, ¶m, span, start_mask); + if (ret) + return ret; + + ret = check_results(¶m, test->resource, + cache_total_size, full_cache_mask, start_mask); + return ret; +} + +static bool arch_supports_noncont_cat(const struct resctrl_test *test) +{ + /* AMD always supports non-contiguous CBM. */ + if (get_vendor() == ARCH_AMD) + return true; + +#if defined(__i386__) || defined(__x86_64__) /* arch */ + unsigned int eax, ebx, ecx, edx; + /* Intel support for non-contiguous CBM needs to be discovered. */ + if (!strcmp(test->resource, "L3")) + __cpuid_count(0x10, 1, eax, ebx, ecx, edx); + else if (!strcmp(test->resource, "L2")) + __cpuid_count(0x10, 2, eax, ebx, ecx, edx); + else + return false; + + return ((ecx >> 3) & 1); +#endif /* end arch */ + + return false; +} + +static int noncont_cat_run_test(const struct resctrl_test *test, + const struct user_params *uparams) +{ + unsigned long full_cache_mask, cont_mask, noncont_mask; + unsigned int sparse_masks; + int bit_center, ret; + char schemata[64]; + + /* Check to compare sparse_masks content to CPUID output. */ + ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks); + if (ret) + return ret; + + if (arch_supports_noncont_cat(test) != sparse_masks) { + ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n"); + return 1; + } + + /* Write checks initialization. */ + ret = get_full_cbm(test->resource, &full_cache_mask); + if (ret < 0) + return ret; + bit_center = count_bits(full_cache_mask) / 2; + + /* + * The bit_center needs to be at least 3 to properly calculate the CBM + * hole in the noncont_mask. If it's smaller return an error since the + * cache mask is too short and that shouldn't happen. + */ + if (bit_center < 3) + return -EINVAL; + cont_mask = full_cache_mask >> bit_center; + + /* Contiguous mask write check. */ + snprintf(schemata, sizeof(schemata), "%lx", cont_mask); + ret = write_schemata("", schemata, uparams->cpu, test->resource); + if (ret) { + ksft_print_msg("Write of contiguous CBM failed\n"); + return 1; + } + + /* + * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle. + * Output is compared with support information to catch any edge case errors. + */ + noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask; + snprintf(schemata, sizeof(schemata), "%lx", noncont_mask); + ret = write_schemata("", schemata, uparams->cpu, test->resource); + if (ret && sparse_masks) + ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n"); + else if (ret && !sparse_masks) + ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n"); + else if (!ret && !sparse_masks) + ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n"); + + return !ret == !sparse_masks; +} + +static bool noncont_cat_feature_check(const struct resctrl_test *test) +{ + if (!resctrl_resource_exists(test->resource)) + return false; + + return resource_info_file_exists(test->resource, "sparse_masks"); +} + +struct resctrl_test l3_cat_test = { + .name = "L3_CAT", + .group = "CAT", + .resource = "L3", + .feature_check = test_resource_feature_check, + .run_test = cat_run_test, + .cleanup = cat_test_cleanup, +}; + +struct resctrl_test l3_noncont_cat_test = { + .name = "L3_NONCONT_CAT", + .group = "CAT", + .resource = "L3", + .feature_check = noncont_cat_feature_check, + .run_test = noncont_cat_run_test, +}; + +struct resctrl_test l2_noncont_cat_test = { + .name = "L2_NONCONT_CAT", + .group = "CAT", + .resource = "L2", + .feature_check = noncont_cat_feature_check, + .run_test = noncont_cat_run_test, +}; diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c new file mode 100644 index 000000000000..d09e693dc739 --- /dev/null +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cache Monitoring Technology (CMT) test + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, + * Fenghua Yu <fenghua.yu@intel.com> + */ +#include "resctrl.h" +#include <unistd.h> + +#define RESULT_FILE_NAME "result_cmt" +#define NUM_OF_RUNS 5 +#define MAX_DIFF 2000000 +#define MAX_DIFF_PERCENT 15 + +#define CON_MON_LCC_OCCUP_PATH \ + "%s/%s/mon_data/mon_L3_%02d/llc_occupancy" + +static int cmt_init(const struct resctrl_val_param *param, int domain_id) +{ + sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, + param->ctrlgrp, domain_id); + + return 0; +} + +static int cmt_setup(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *p) +{ + /* Run NUM_OF_RUNS times */ + if (p->num_of_runs >= NUM_OF_RUNS) + return END_OF_TESTS; + + p->num_of_runs++; + + return 0; +} + +static int cmt_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + sleep(1); + return measure_llc_resctrl(param->filename, bm_pid); +} + +static int show_results_info(unsigned long sum_llc_val, int no_of_bits, + unsigned long cache_span, unsigned long max_diff, + unsigned long max_diff_percent, unsigned long num_of_runs, + bool platform) +{ + unsigned long avg_llc_val = 0; + float diff_percent; + long avg_diff = 0; + int ret; + + avg_llc_val = sum_llc_val / num_of_runs; + avg_diff = (long)(cache_span - avg_llc_val); + diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100; + + ret = platform && abs((int)diff_percent) > max_diff_percent && + labs(avg_diff) > max_diff; + + ksft_print_msg("%s Check cache miss rate within %lu%%\n", + ret ? "Fail:" : "Pass:", max_diff_percent); + + ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent)); + + show_cache_info(no_of_bits, avg_llc_val, cache_span, false); + + return ret; +} + +static int check_results(struct resctrl_val_param *param, size_t span, int no_of_bits) +{ + char *token_array[8], temp[512]; + unsigned long sum_llc_occu_resc = 0; + int runs = 0; + FILE *fp; + + ksft_print_msg("Checking for pass/fail\n"); + fp = fopen(param->filename, "r"); + if (!fp) { + ksft_perror("Error in opening file"); + + return -1; + } + + while (fgets(temp, sizeof(temp), fp)) { + char *token = strtok(temp, ":\t"); + int fields = 0; + + while (token) { + token_array[fields++] = token; + token = strtok(NULL, ":\t"); + } + + /* Field 3 is llc occ resc value */ + sum_llc_occu_resc += strtoul(token_array[3], NULL, 0); + runs++; + } + fclose(fp); + + return show_results_info(sum_llc_occu_resc, no_of_bits, span, + MAX_DIFF, MAX_DIFF_PERCENT, runs, true); +} + +static void cmt_test_cleanup(void) +{ + remove(RESULT_FILE_NAME); +} + +static int cmt_run_test(const struct resctrl_test *test, const struct user_params *uparams) +{ + struct fill_buf_param fill_buf = {}; + unsigned long cache_total_size = 0; + int n = uparams->bits ? : 5; + unsigned long long_mask; + int count_of_bits; + size_t span; + int ret; + + ret = get_full_cbm("L3", &long_mask); + if (ret) + return ret; + + ret = get_cache_size(uparams->cpu, "L3", &cache_total_size); + if (ret) + return ret; + ksft_print_msg("Cache size :%lu\n", cache_total_size); + + count_of_bits = count_bits(long_mask); + + if (n < 1 || n > count_of_bits) { + ksft_print_msg("Invalid input value for numbr_of_bits n!\n"); + ksft_print_msg("Please enter value in range 1 to %d\n", count_of_bits); + return -1; + } + + struct resctrl_val_param param = { + .ctrlgrp = "c1", + .filename = RESULT_FILE_NAME, + .mask = ~(long_mask << n) & long_mask, + .num_of_runs = 0, + .init = cmt_init, + .setup = cmt_setup, + .measure = cmt_measure, + }; + + span = cache_portion_size(cache_total_size, param.mask, long_mask); + + if (uparams->fill_buf) { + fill_buf.buf_size = span; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + fill_buf.buf_size = span; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; + } + + remove(RESULT_FILE_NAME); + + ret = resctrl_val(test, uparams, ¶m); + if (ret) + return ret; + + ret = check_results(¶m, span, n); + if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n"); + + return ret; +} + +static bool cmt_feature_check(const struct resctrl_test *test) +{ + return test_resource_feature_check(test) && + resctrl_mon_feature_exists("L3_MON", "llc_occupancy"); +} + +struct resctrl_test cmt_test = { + .name = "CMT", + .resource = "L3", + .feature_check = cmt_feature_check, + .run_test = cmt_run_test, + .cleanup = cmt_test_cleanup, +}; diff --git a/tools/testing/selftests/resctrl/config b/tools/testing/selftests/resctrl/config new file mode 100644 index 000000000000..8d9f2deb56ed --- /dev/null +++ b/tools/testing/selftests/resctrl/config @@ -0,0 +1,2 @@ +CONFIG_X86_CPU_RESCTRL=y +CONFIG_PROC_CPU_RESCTRL=y diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c new file mode 100644 index 000000000000..19a01a52dc1a --- /dev/null +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fill_buf benchmark + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, + * Fenghua Yu <fenghua.yu@intel.com> + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <inttypes.h> +#include <string.h> + +#include "resctrl.h" + +#define CL_SIZE (64) +#define PAGE_SIZE (4 * 1024) +#define MB (1024 * 1024) + +static void sb(void) +{ +#if defined(__i386) || defined(__x86_64) + asm volatile("sfence\n\t" + : : : "memory"); +#endif +} + +static void cl_flush(void *p) +{ +#if defined(__i386) || defined(__x86_64) + asm volatile("clflush (%0)\n\t" + : : "r"(p) : "memory"); +#endif +} + +void mem_flush(unsigned char *buf, size_t buf_size) +{ + unsigned char *cp = buf; + size_t i = 0; + + buf_size = buf_size / CL_SIZE; /* mem size in cache lines */ + + for (i = 0; i < buf_size; i++) + cl_flush(&cp[i * CL_SIZE]); + + sb(); +} + +/* + * Buffer index step advance to workaround HW prefetching interfering with + * the measurements. + * + * Must be a prime to step through all indexes of the buffer. + * + * Some primes work better than others on some architectures (from MBA/MBM + * result stability point of view). + */ +#define FILL_IDX_MULT 23 + +static int fill_one_span_read(unsigned char *buf, size_t buf_size) +{ + unsigned int size = buf_size / (CL_SIZE / 2); + unsigned int i, idx = 0; + unsigned char sum = 0; + + /* + * Read the buffer in an order that is unexpected by HW prefetching + * optimizations to prevent them interfering with the caching pattern. + * + * The read order is (in terms of halves of cachelines): + * i * FILL_IDX_MULT % size + * The formula is open-coded below to avoiding modulo inside the loop + * as it improves MBA/MBM result stability on some architectures. + */ + for (i = 0; i < size; i++) { + sum += buf[idx * (CL_SIZE / 2)]; + + idx += FILL_IDX_MULT; + while (idx >= size) + idx -= size; + } + + return sum; +} + +void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) +{ + int ret = 0; + + while (1) { + ret = fill_one_span_read(buf, buf_size); + if (once) + break; + } + + /* Consume read result so that reading memory is not optimized out. */ + *value_sink = ret; +} + +unsigned char *alloc_buffer(size_t buf_size, bool memflush) +{ + void *buf = NULL; + uint64_t *p64; + ssize_t s64; + int ret; + + ret = posix_memalign(&buf, PAGE_SIZE, buf_size); + if (ret < 0) + return NULL; + + /* Initialize the buffer */ + p64 = buf; + s64 = buf_size / sizeof(uint64_t); + + while (s64 > 0) { + *p64 = (uint64_t)rand(); + p64 += (CL_SIZE / sizeof(uint64_t)); + s64 -= (CL_SIZE / sizeof(uint64_t)); + } + + /* Flush the memory before using to avoid "cache hot pages" effect */ + if (memflush) + mem_flush(buf, buf_size); + + return buf; +} + +ssize_t get_fill_buf_size(int cpu_no, const char *cache_type) +{ + unsigned long cache_total_size = 0; + int ret; + + ret = get_cache_size(cpu_no, cache_type, &cache_total_size); + if (ret) + return ret; + + return cache_total_size * 2 > MINIMUM_SPAN ? + cache_total_size * 2 : MINIMUM_SPAN; +} diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c new file mode 100644 index 000000000000..c7e9adc0368f --- /dev/null +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Memory Bandwidth Allocation (MBA) test + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, + * Fenghua Yu <fenghua.yu@intel.com> + */ +#include "resctrl.h" + +#define RESULT_FILE_NAME "result_mba" +#define NUM_OF_RUNS 5 +#define MAX_DIFF_PERCENT 8 +#define ALLOCATION_MAX 100 +#define ALLOCATION_MIN 10 +#define ALLOCATION_STEP 10 + +static int mba_init(const struct resctrl_val_param *param, int domain_id) +{ + int ret; + + ret = initialize_read_mem_bw_imc(); + if (ret) + return ret; + + initialize_mem_bw_resctrl(param, domain_id); + + return 0; +} + +/* + * Change schemata percentage from 100 to 10%. Write schemata to specified + * con_mon grp, mon_grp in resctrl FS. + * For each allocation, run 5 times in order to get average values. + */ +static int mba_setup(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *p) +{ + static unsigned int allocation = ALLOCATION_MIN; + static int runs_per_allocation; + char allocation_str[64]; + int ret; + + if (runs_per_allocation >= NUM_OF_RUNS) + runs_per_allocation = 0; + + /* Only set up schemata once every NUM_OF_RUNS of allocations */ + if (runs_per_allocation++ != 0) + return 0; + + if (allocation > ALLOCATION_MAX) + return END_OF_TESTS; + + sprintf(allocation_str, "%d", allocation); + + ret = write_schemata(p->ctrlgrp, allocation_str, uparams->cpu, test->resource); + if (ret < 0) + return ret; + + allocation += ALLOCATION_STEP; + + return 0; +} + +static int mba_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + return measure_read_mem_bw(uparams, param, bm_pid); +} + +static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) +{ + unsigned int allocation; + bool ret = false; + int runs; + + ksft_print_msg("Results are displayed in (MB)\n"); + /* Memory bandwidth from 100% down to 10% */ + for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP; + allocation++) { + unsigned long sum_bw_imc = 0, sum_bw_resc = 0; + long avg_bw_imc, avg_bw_resc; + int avg_diff_per; + float avg_diff; + + for (runs = NUM_OF_RUNS * allocation; + runs < NUM_OF_RUNS * allocation + NUM_OF_RUNS ; runs++) { + sum_bw_imc += bw_imc[runs]; + sum_bw_resc += bw_resc[runs]; + } + + avg_bw_imc = sum_bw_imc / NUM_OF_RUNS; + avg_bw_resc = sum_bw_resc / NUM_OF_RUNS; + if (avg_bw_imc < THROTTLE_THRESHOLD || avg_bw_resc < THROTTLE_THRESHOLD) { + ksft_print_msg("Bandwidth below threshold (%d MiB). Dropping results from MBA schemata %u.\n", + THROTTLE_THRESHOLD, + ALLOCATION_MIN + ALLOCATION_STEP * allocation); + continue; + } + + avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; + avg_diff_per = (int)(avg_diff * 100); + + ksft_print_msg("%s Check MBA diff within %d%% for schemata %u\n", + avg_diff_per > MAX_DIFF_PERCENT ? + "Fail:" : "Pass:", + MAX_DIFF_PERCENT, + ALLOCATION_MIN + ALLOCATION_STEP * allocation); + + ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); + ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); + ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); + if (avg_diff_per > MAX_DIFF_PERCENT) + ret = true; + } + + ksft_print_msg("%s Check schemata change using MBA\n", + ret ? "Fail:" : "Pass:"); + if (ret) + ksft_print_msg("At least one test failed\n"); + + return ret; +} + +static int check_results(void) +{ + unsigned long bw_resc[NUM_OF_RUNS * ALLOCATION_MAX / ALLOCATION_STEP]; + unsigned long bw_imc[NUM_OF_RUNS * ALLOCATION_MAX / ALLOCATION_STEP]; + char *token_array[8], output[] = RESULT_FILE_NAME, temp[512]; + int runs; + FILE *fp; + + fp = fopen(output, "r"); + if (!fp) { + ksft_perror(output); + + return -1; + } + + runs = 0; + while (fgets(temp, sizeof(temp), fp)) { + char *token = strtok(temp, ":\t"); + int fields = 0; + + while (token) { + token_array[fields++] = token; + token = strtok(NULL, ":\t"); + } + + /* Field 3 is perf imc value */ + bw_imc[runs] = strtoul(token_array[3], NULL, 0); + /* Field 5 is resctrl value */ + bw_resc[runs] = strtoul(token_array[5], NULL, 0); + runs++; + } + + fclose(fp); + + return show_mba_info(bw_imc, bw_resc); +} + +static void mba_test_cleanup(void) +{ + remove(RESULT_FILE_NAME); +} + +static int mba_run_test(const struct resctrl_test *test, const struct user_params *uparams) +{ + struct resctrl_val_param param = { + .ctrlgrp = "c1", + .filename = RESULT_FILE_NAME, + .init = mba_init, + .setup = mba_setup, + .measure = mba_measure, + }; + struct fill_buf_param fill_buf = {}; + int ret; + + remove(RESULT_FILE_NAME); + + if (uparams->fill_buf) { + fill_buf.buf_size = uparams->fill_buf->buf_size; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + ssize_t buf_size; + + buf_size = get_fill_buf_size(uparams->cpu, "L3"); + if (buf_size < 0) + return buf_size; + fill_buf.buf_size = buf_size; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; + } + + ret = resctrl_val(test, uparams, ¶m); + if (ret) + return ret; + + ret = check_results(); + if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n"); + + return ret; +} + +static bool mba_feature_check(const struct resctrl_test *test) +{ + return test_resource_feature_check(test) && + resctrl_mon_feature_exists("L3_MON", "mbm_local_bytes"); +} + +struct resctrl_test mba_test = { + .name = "MBA", + .resource = "MB", + .vendor_specific = ARCH_INTEL, + .feature_check = mba_feature_check, + .run_test = mba_run_test, + .cleanup = mba_test_cleanup, +}; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c new file mode 100644 index 000000000000..84d8bc250539 --- /dev/null +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Memory Bandwidth Monitoring (MBM) test + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, + * Fenghua Yu <fenghua.yu@intel.com> + */ +#include "resctrl.h" + +#define RESULT_FILE_NAME "result_mbm" +#define MAX_DIFF_PERCENT 8 +#define NUM_OF_RUNS 5 + +static int +show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span) +{ + unsigned long sum_bw_imc = 0, sum_bw_resc = 0; + long avg_bw_imc = 0, avg_bw_resc = 0; + int runs, ret, avg_diff_per; + float avg_diff = 0; + + for (runs = 0; runs < NUM_OF_RUNS; runs++) { + sum_bw_imc += bw_imc[runs]; + sum_bw_resc += bw_resc[runs]; + } + + avg_bw_imc = sum_bw_imc / NUM_OF_RUNS; + avg_bw_resc = sum_bw_resc / NUM_OF_RUNS; + avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; + avg_diff_per = (int)(avg_diff * 100); + + ret = avg_diff_per > MAX_DIFF_PERCENT; + ksft_print_msg("%s Check MBM diff within %d%%\n", + ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT); + ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); + if (span) + ksft_print_msg("Span (MB): %zu\n", span / MB); + ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); + ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); + + return ret; +} + +static int check_results(size_t span) +{ + unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS]; + char temp[1024], *token_array[8]; + char output[] = RESULT_FILE_NAME; + int runs, ret; + FILE *fp; + + ksft_print_msg("Checking for pass/fail\n"); + + fp = fopen(output, "r"); + if (!fp) { + ksft_perror(output); + + return -1; + } + + runs = 0; + while (fgets(temp, sizeof(temp), fp)) { + char *token = strtok(temp, ":\t"); + int i = 0; + + while (token) { + token_array[i++] = token; + token = strtok(NULL, ":\t"); + } + + bw_resc[runs] = strtoul(token_array[5], NULL, 0); + bw_imc[runs] = strtoul(token_array[3], NULL, 0); + runs++; + } + + ret = show_bw_info(bw_imc, bw_resc, span); + + fclose(fp); + + return ret; +} + +static int mbm_init(const struct resctrl_val_param *param, int domain_id) +{ + int ret; + + ret = initialize_read_mem_bw_imc(); + if (ret) + return ret; + + initialize_mem_bw_resctrl(param, domain_id); + + return 0; +} + +static int mbm_setup(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *p) +{ + int ret = 0; + + /* Run NUM_OF_RUNS times */ + if (p->num_of_runs >= NUM_OF_RUNS) + return END_OF_TESTS; + + /* Set up shemata with 100% allocation on the first run. */ + if (p->num_of_runs == 0 && resctrl_resource_exists("MB")) + ret = write_schemata(p->ctrlgrp, "100", uparams->cpu, test->resource); + + p->num_of_runs++; + + return ret; +} + +static int mbm_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + return measure_read_mem_bw(uparams, param, bm_pid); +} + +static void mbm_test_cleanup(void) +{ + remove(RESULT_FILE_NAME); +} + +static int mbm_run_test(const struct resctrl_test *test, const struct user_params *uparams) +{ + struct resctrl_val_param param = { + .ctrlgrp = "c1", + .filename = RESULT_FILE_NAME, + .init = mbm_init, + .setup = mbm_setup, + .measure = mbm_measure, + }; + struct fill_buf_param fill_buf = {}; + int ret; + + remove(RESULT_FILE_NAME); + + if (uparams->fill_buf) { + fill_buf.buf_size = uparams->fill_buf->buf_size; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + ssize_t buf_size; + + buf_size = get_fill_buf_size(uparams->cpu, "L3"); + if (buf_size < 0) + return buf_size; + fill_buf.buf_size = buf_size; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; + } + + ret = resctrl_val(test, uparams, ¶m); + if (ret) + return ret; + + ret = check_results(param.fill_buf ? param.fill_buf->buf_size : 0); + if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n"); + + return ret; +} + +static bool mbm_feature_check(const struct resctrl_test *test) +{ + return resctrl_mon_feature_exists("L3_MON", "mbm_total_bytes") && + resctrl_mon_feature_exists("L3_MON", "mbm_local_bytes"); +} + +struct resctrl_test mbm_test = { + .name = "MBM", + .resource = "MB", + .vendor_specific = ARCH_INTEL, + .feature_check = mbm_feature_check, + .run_test = mbm_run_test, + .cleanup = mbm_test_cleanup, +}; diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h new file mode 100644 index 000000000000..3c51bdac2dfa --- /dev/null +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -0,0 +1,248 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef RESCTRL_H +#define RESCTRL_H +#include <stdio.h> +#include <math.h> +#include <errno.h> +#include <sched.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <signal.h> +#include <dirent.h> +#include <stdbool.h> +#include <ctype.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/select.h> +#include <sys/time.h> +#include <sys/eventfd.h> +#include <asm/unistd.h> +#include <linux/perf_event.h> +#include <linux/compiler.h> +#include "kselftest.h" + +#define MB (1024 * 1024) +#define RESCTRL_PATH "/sys/fs/resctrl" +#define PHYS_ID_PATH "/sys/devices/system/cpu/cpu" +#define INFO_PATH "/sys/fs/resctrl/info" + +/* + * CPU vendor IDs + * + * Define as bits because they're used for vendor_specific bitmask in + * the struct resctrl_test. + */ +#define ARCH_INTEL 1 +#define ARCH_AMD 2 + +#define END_OF_TESTS 1 + +#define BENCHMARK_ARGS 64 + +#define MINIMUM_SPAN (250 * MB) + +/* + * Memory bandwidth (in MiB) below which the bandwidth comparisons + * between iMC and resctrl are considered unreliable. For example RAS + * features or memory performance features that generate memory traffic + * may drive accesses that are counted differently by performance counters + * and MBM respectively, for instance generating "overhead" traffic which + * is not counted against any specific RMID. + */ +#define THROTTLE_THRESHOLD 750 + +/* + * fill_buf_param: "fill_buf" benchmark parameters + * @buf_size: Size (in bytes) of buffer used in benchmark. + * "fill_buf" allocates and initializes buffer of + * @buf_size. User can change value via command line. + * @memflush: If false the buffer will not be flushed after + * allocation and initialization, otherwise the + * buffer will be flushed. User can change value via + * command line (via integers with 0 interpreted as + * false and anything else as true). + */ +struct fill_buf_param { + size_t buf_size; + bool memflush; +}; + +/* + * user_params: User supplied parameters + * @cpu: CPU number to which the benchmark will be bound to + * @bits: Number of bits used for cache allocation size + * @benchmark_cmd: Benchmark command to run during (some of the) tests + * @fill_buf: Pointer to user provided parameters for "fill_buf", + * NULL if user did not provide parameters and test + * specific defaults should be used. + */ +struct user_params { + int cpu; + int bits; + const char *benchmark_cmd[BENCHMARK_ARGS]; + const struct fill_buf_param *fill_buf; +}; + +/* + * resctrl_test: resctrl test definition + * @name: Test name + * @group: Test group - a common name for tests that share some characteristic + * (e.g., L3 CAT test belongs to the CAT group). Can be NULL + * @resource: Resource to test (e.g., MB, L3, L2, etc.) + * @vendor_specific: Bitmask for vendor-specific tests (can be 0 for universal tests) + * @disabled: Test is disabled + * @feature_check: Callback to check required resctrl features + * @run_test: Callback to run the test + * @cleanup: Callback to cleanup after the test + */ +struct resctrl_test { + const char *name; + const char *group; + const char *resource; + unsigned int vendor_specific; + bool disabled; + bool (*feature_check)(const struct resctrl_test *test); + int (*run_test)(const struct resctrl_test *test, + const struct user_params *uparams); + void (*cleanup)(void); +}; + +/* + * resctrl_val_param: resctrl test parameters + * @ctrlgrp: Name of the control monitor group (con_mon grp) + * @mongrp: Name of the monitor group (mon grp) + * @filename: Name of file to which the o/p should be written + * @init: Callback function to initialize test environment + * @setup: Callback function to setup per test run environment + * @measure: Callback that performs the measurement (a single test) + * @fill_buf: Parameters for default "fill_buf" benchmark. + * Initialized with user provided parameters, possibly + * adapted to be relevant to the test. If user does + * not provide parameters for "fill_buf" nor a + * replacement benchmark then initialized with defaults + * appropriate for test. NULL if user provided + * benchmark. + */ +struct resctrl_val_param { + const char *ctrlgrp; + const char *mongrp; + char filename[64]; + unsigned long mask; + int num_of_runs; + int (*init)(const struct resctrl_val_param *param, + int domain_id); + int (*setup)(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *param); + int (*measure)(const struct user_params *uparams, + struct resctrl_val_param *param, + pid_t bm_pid); + struct fill_buf_param *fill_buf; +}; + +struct perf_event_read { + __u64 nr; /* The number of events */ + struct { + __u64 value; /* The value of the event */ + } values[2]; +}; + +/* + * Memory location that consumes values compiler must not optimize away. + * Volatile ensures writes to this location cannot be optimized away by + * compiler. + */ +extern volatile int *value_sink; + +extern int snc_unreliable; + +extern char llc_occup_path[1024]; + +int snc_nodes_per_l3_cache(void); +int get_vendor(void); +bool check_resctrlfs_support(void); +int filter_dmesg(void); +int get_domain_id(const char *resource, int cpu_no, int *domain_id); +int mount_resctrlfs(void); +int umount_resctrlfs(void); +bool resctrl_resource_exists(const char *resource); +bool resctrl_mon_feature_exists(const char *resource, const char *feature); +bool resource_info_file_exists(const char *resource, const char *file); +bool test_resource_feature_check(const struct resctrl_test *test); +char *fgrep(FILE *inf, const char *str); +int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity); +int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity); +int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, + const char *resource); +int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp); +int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, + int group_fd, unsigned long flags); +unsigned char *alloc_buffer(size_t buf_size, bool memflush); +void mem_flush(unsigned char *buf, size_t buf_size); +void fill_cache_read(unsigned char *buf, size_t buf_size, bool once); +ssize_t get_fill_buf_size(int cpu_no, const char *cache_type); +int initialize_read_mem_bw_imc(void); +int measure_read_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid); +void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, + int domain_id); +int resctrl_val(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *param); +unsigned long create_bit_mask(unsigned int start, unsigned int len); +unsigned int count_contiguous_bits(unsigned long val, unsigned int *start); +int get_full_cbm(const char *cache_type, unsigned long *mask); +int get_mask_no_shareable(const char *cache_type, unsigned long *mask); +int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size); +int resource_info_unsigned_get(const char *resource, const char *filename, unsigned int *val); +void ctrlc_handler(int signum, siginfo_t *info, void *ptr); +int signal_handler_register(const struct resctrl_test *test); +void signal_handler_unregister(void); +unsigned int count_bits(unsigned long n); +int snc_kernel_support(void); + +void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config); +void perf_event_initialize_read_format(struct perf_event_read *pe_read); +int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no); +int perf_event_reset_enable(int pe_fd); +int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, + const char *filename, pid_t bm_pid); +int measure_llc_resctrl(const char *filename, pid_t bm_pid); +void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines); + +/* + * cache_portion_size - Calculate the size of a cache portion + * @cache_size: Total cache size in bytes + * @portion_mask: Cache portion mask + * @full_cache_mask: Full Cache Bit Mask (CBM) for the cache + * + * Return: The size of the cache portion in bytes. + */ +static inline unsigned long cache_portion_size(unsigned long cache_size, + unsigned long portion_mask, + unsigned long full_cache_mask) +{ + unsigned int bits = count_bits(full_cache_mask); + + /* + * With no bits the full CBM, assume cache cannot be split into + * smaller portions. To avoid divide by zero, return cache_size. + */ + if (!bits) + return cache_size; + + return cache_size * count_bits(portion_mask) / bits; +} + +extern struct resctrl_test mbm_test; +extern struct resctrl_test mba_test; +extern struct resctrl_test cmt_test; +extern struct resctrl_test l3_cat_test; +extern struct resctrl_test l3_noncont_cat_test; +extern struct resctrl_test l2_noncont_cat_test; + +#endif /* RESCTRL_H */ diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c new file mode 100644 index 000000000000..5154ffd821c4 --- /dev/null +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Resctrl tests + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, + * Fenghua Yu <fenghua.yu@intel.com> + */ +#include "resctrl.h" + +/* Volatile memory sink to prevent compiler optimizations */ +static volatile int sink_target; +volatile int *value_sink = &sink_target; + +static struct resctrl_test *resctrl_tests[] = { + &mbm_test, + &mba_test, + &cmt_test, + &l3_cat_test, + &l3_noncont_cat_test, + &l2_noncont_cat_test, +}; + +static int detect_vendor(void) +{ + FILE *inf = fopen("/proc/cpuinfo", "r"); + int vendor_id = 0; + char *s = NULL; + char *res; + + if (!inf) + return vendor_id; + + res = fgrep(inf, "vendor_id"); + + if (res) + s = strchr(res, ':'); + + if (s && !strcmp(s, ": GenuineIntel\n")) + vendor_id = ARCH_INTEL; + else if (s && !strcmp(s, ": AuthenticAMD\n")) + vendor_id = ARCH_AMD; + + fclose(inf); + free(res); + return vendor_id; +} + +int get_vendor(void) +{ + static int vendor = -1; + + if (vendor == -1) + vendor = detect_vendor(); + if (vendor == 0) + ksft_print_msg("Can not get vendor info...\n"); + + return vendor; +} + +static void cmd_help(void) +{ + int i; + + printf("usage: resctrl_tests [-h] [-t test list] [-n no_of_bits] [-b benchmark_cmd [option]...]\n"); + printf("\t-b benchmark_cmd [option]...: run specified benchmark for MBM, MBA and CMT\n"); + printf("\t default benchmark is builtin fill_buf\n"); + printf("\t-t test list: run tests/groups specified by the list, "); + printf("e.g. -t mbm,mba,cmt,cat\n"); + printf("\t\tSupported tests (group):\n"); + for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) { + if (resctrl_tests[i]->group) + printf("\t\t\t%s (%s)\n", resctrl_tests[i]->name, resctrl_tests[i]->group); + else + printf("\t\t\t%s\n", resctrl_tests[i]->name); + } + printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n"); + printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n"); + printf("\t-h: help\n"); +} + +static int test_prepare(const struct resctrl_test *test) +{ + int res; + + res = signal_handler_register(test); + if (res) { + ksft_print_msg("Failed to register signal handler\n"); + return res; + } + + res = mount_resctrlfs(); + if (res) { + signal_handler_unregister(); + ksft_print_msg("Failed to mount resctrl FS\n"); + return res; + } + return 0; +} + +static void test_cleanup(const struct resctrl_test *test) +{ + if (test->cleanup) + test->cleanup(); + umount_resctrlfs(); + signal_handler_unregister(); +} + +static bool test_vendor_specific_check(const struct resctrl_test *test) +{ + if (!test->vendor_specific) + return true; + + return get_vendor() & test->vendor_specific; +} + +static void run_single_test(const struct resctrl_test *test, const struct user_params *uparams) +{ + int ret, snc_mode; + + if (test->disabled) + return; + + if (!test_vendor_specific_check(test)) { + ksft_test_result_skip("Hardware does not support %s\n", test->name); + return; + } + + snc_mode = snc_nodes_per_l3_cache(); + + ksft_print_msg("Starting %s test ...\n", test->name); + + if (snc_mode == 1 && snc_unreliable && get_vendor() == ARCH_INTEL) { + ksft_test_result_skip("SNC detection unreliable due to offline CPUs. Test results may not be accurate if SNC enabled.\n"); + return; + } + + if (test_prepare(test)) { + ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); + return; + } + + if (!test->feature_check(test)) { + ksft_test_result_skip("Hardware does not support %s or %s is disabled\n", + test->name, test->name); + goto cleanup; + } + + ret = test->run_test(test, uparams); + ksft_test_result(!ret, "%s: test\n", test->name); + +cleanup: + test_cleanup(test); +} + +/* + * Allocate and initialize a struct fill_buf_param with user provided + * (via "-b fill_buf <fill_buf parameters>") parameters. + * + * Use defaults (that may not be appropriate for all tests) for any + * fill_buf parameters omitted by the user. + * + * Historically it may have been possible for user space to provide + * additional parameters, "operation" ("read" vs "write") in + * benchmark_cmd[3] and "once" (run "once" or until terminated) in + * benchmark_cmd[4]. Changing these parameters have never been + * supported with the default of "read" operation and running until + * terminated built into the tests. Any unsupported values for + * (original) "fill_buf" parameters are treated as failure. + * + * Return: On failure, forcibly exits the test on any parsing failure, + * returns NULL if no parsing needed (user did not actually provide + * "-b fill_buf"). + * On success, returns pointer to newly allocated and fully + * initialized struct fill_buf_param that caller must free. + */ +static struct fill_buf_param *alloc_fill_buf_param(struct user_params *uparams) +{ + struct fill_buf_param *fill_param = NULL; + char *endptr = NULL; + + if (!uparams->benchmark_cmd[0] || strcmp(uparams->benchmark_cmd[0], "fill_buf")) + return NULL; + + fill_param = malloc(sizeof(*fill_param)); + if (!fill_param) + ksft_exit_skip("Unable to allocate memory for fill_buf parameters.\n"); + + if (uparams->benchmark_cmd[1] && *uparams->benchmark_cmd[1] != '\0') { + errno = 0; + fill_param->buf_size = strtoul(uparams->benchmark_cmd[1], &endptr, 10); + if (errno || *endptr != '\0') { + free(fill_param); + ksft_exit_skip("Unable to parse benchmark buffer size.\n"); + } + } else { + fill_param->buf_size = MINIMUM_SPAN; + } + + if (uparams->benchmark_cmd[2] && *uparams->benchmark_cmd[2] != '\0') { + errno = 0; + fill_param->memflush = strtol(uparams->benchmark_cmd[2], &endptr, 10) != 0; + if (errno || *endptr != '\0') { + free(fill_param); + ksft_exit_skip("Unable to parse benchmark memflush parameter.\n"); + } + } else { + fill_param->memflush = true; + } + + if (uparams->benchmark_cmd[3] && *uparams->benchmark_cmd[3] != '\0') { + if (strcmp(uparams->benchmark_cmd[3], "0")) { + free(fill_param); + ksft_exit_skip("Only read operations supported.\n"); + } + } + + if (uparams->benchmark_cmd[4] && *uparams->benchmark_cmd[4] != '\0') { + if (strcmp(uparams->benchmark_cmd[4], "false")) { + free(fill_param); + ksft_exit_skip("fill_buf is required to run until termination.\n"); + } + } + + return fill_param; +} + +static void init_user_params(struct user_params *uparams) +{ + memset(uparams, 0, sizeof(*uparams)); + + uparams->cpu = 1; + uparams->bits = 0; +} + +int main(int argc, char **argv) +{ + struct fill_buf_param *fill_param = NULL; + int tests = ARRAY_SIZE(resctrl_tests); + bool test_param_seen = false; + struct user_params uparams; + int c, i; + + init_user_params(&uparams); + + while ((c = getopt(argc, argv, "ht:b:n:p:")) != -1) { + char *token; + + switch (c) { + case 'b': + /* + * First move optind back to the (first) optarg and + * then build the benchmark command using the + * remaining arguments. + */ + optind--; + if (argc - optind >= BENCHMARK_ARGS) + ksft_exit_fail_msg("Too long benchmark command"); + + /* Extract benchmark command from command line. */ + for (i = 0; i < argc - optind; i++) + uparams.benchmark_cmd[i] = argv[i + optind]; + uparams.benchmark_cmd[i] = NULL; + + goto last_arg; + case 't': + token = strtok(optarg, ","); + + if (!test_param_seen) { + for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) + resctrl_tests[i]->disabled = true; + tests = 0; + test_param_seen = true; + } + while (token) { + bool found = false; + + for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) { + if (!strcasecmp(token, resctrl_tests[i]->name) || + (resctrl_tests[i]->group && + !strcasecmp(token, resctrl_tests[i]->group))) { + if (resctrl_tests[i]->disabled) + tests++; + resctrl_tests[i]->disabled = false; + found = true; + } + } + + if (!found) { + printf("invalid test: %s\n", token); + + return -1; + } + token = strtok(NULL, ","); + } + break; + case 'p': + uparams.cpu = atoi(optarg); + break; + case 'n': + uparams.bits = atoi(optarg); + if (uparams.bits <= 0) { + printf("Bail out! invalid argument for no_of_bits\n"); + return -1; + } + break; + case 'h': + cmd_help(); + + return 0; + default: + printf("invalid argument\n"); + + return -1; + } + } +last_arg: + + fill_param = alloc_fill_buf_param(&uparams); + if (fill_param) + uparams.fill_buf = fill_param; + + ksft_print_header(); + + /* + * Typically we need root privileges, because: + * 1. We write to resctrl FS + * 2. We execute perf commands + */ + if (geteuid() != 0) + ksft_exit_skip("Not running as root. Skipping...\n"); + + if (!check_resctrlfs_support()) + ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n"); + + if (umount_resctrlfs()) + ksft_exit_skip("resctrl FS unmount failed.\n"); + + filter_dmesg(); + + ksft_set_plan(tests); + + for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) + run_single_test(resctrl_tests[i], &uparams); + + free(fill_param); + ksft_finished(); +} diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c new file mode 100644 index 000000000000..7c08e936572d --- /dev/null +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -0,0 +1,641 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Memory bandwidth monitoring and allocation library + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, + * Fenghua Yu <fenghua.yu@intel.com> + */ +#include "resctrl.h" + +#define UNCORE_IMC "uncore_imc" +#define READ_FILE_NAME "events/cas_count_read" +#define DYN_PMU_PATH "/sys/bus/event_source/devices" +#define SCALE 0.00006103515625 +#define MAX_IMCS 20 +#define MAX_TOKENS 5 + +#define CON_MBM_LOCAL_BYTES_PATH \ + "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" + +struct membw_read_format { + __u64 value; /* The value of the event */ + __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ + __u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ + __u64 id; /* if PERF_FORMAT_ID */ +}; + +struct imc_counter_config { + __u32 type; + __u64 event; + __u64 umask; + struct perf_event_attr pe; + struct membw_read_format return_value; + int fd; +}; + +static char mbm_total_path[1024]; +static int imcs; +static struct imc_counter_config imc_counters_config[MAX_IMCS]; +static const struct resctrl_test *current_test; + +static void read_mem_bw_initialize_perf_event_attr(int i) +{ + memset(&imc_counters_config[i].pe, 0, + sizeof(struct perf_event_attr)); + imc_counters_config[i].pe.type = imc_counters_config[i].type; + imc_counters_config[i].pe.size = sizeof(struct perf_event_attr); + imc_counters_config[i].pe.disabled = 1; + imc_counters_config[i].pe.inherit = 1; + imc_counters_config[i].pe.exclude_guest = 0; + imc_counters_config[i].pe.config = + imc_counters_config[i].umask << 8 | + imc_counters_config[i].event; + imc_counters_config[i].pe.sample_type = PERF_SAMPLE_IDENTIFIER; + imc_counters_config[i].pe.read_format = + PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; +} + +static void read_mem_bw_ioctl_perf_event_ioc_reset_enable(int i) +{ + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_RESET, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_ENABLE, 0); +} + +static void read_mem_bw_ioctl_perf_event_ioc_disable(int i) +{ + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_DISABLE, 0); +} + +/* + * get_read_event_and_umask: Parse config into event and umask + * @cas_count_cfg: Config + * @count: iMC number + */ +static void get_read_event_and_umask(char *cas_count_cfg, int count) +{ + char *token[MAX_TOKENS]; + int i = 0; + + token[0] = strtok(cas_count_cfg, "=,"); + + for (i = 1; i < MAX_TOKENS; i++) + token[i] = strtok(NULL, "=,"); + + for (i = 0; i < MAX_TOKENS - 1; i++) { + if (!token[i]) + break; + if (strcmp(token[i], "event") == 0) + imc_counters_config[count].event = strtol(token[i + 1], NULL, 16); + if (strcmp(token[i], "umask") == 0) + imc_counters_config[count].umask = strtol(token[i + 1], NULL, 16); + } +} + +static int open_perf_read_event(int i, int cpu_no) +{ + imc_counters_config[i].fd = + perf_event_open(&imc_counters_config[i].pe, -1, cpu_no, -1, + PERF_FLAG_FD_CLOEXEC); + + if (imc_counters_config[i].fd == -1) { + fprintf(stderr, "Error opening leader %llx\n", + imc_counters_config[i].pe.config); + + return -1; + } + + return 0; +} + +/* Get type and config of an iMC counter's read event. */ +static int read_from_imc_dir(char *imc_dir, int count) +{ + char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024]; + FILE *fp; + + /* Get type of iMC counter */ + sprintf(imc_counter_type, "%s%s", imc_dir, "type"); + fp = fopen(imc_counter_type, "r"); + if (!fp) { + ksft_perror("Failed to open iMC counter type file"); + + return -1; + } + if (fscanf(fp, "%u", &imc_counters_config[count].type) <= 0) { + ksft_perror("Could not get iMC type"); + fclose(fp); + + return -1; + } + fclose(fp); + + /* Get read config */ + sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME); + fp = fopen(imc_counter_cfg, "r"); + if (!fp) { + ksft_perror("Failed to open iMC config file"); + + return -1; + } + if (fscanf(fp, "%1023s", cas_count_cfg) <= 0) { + ksft_perror("Could not get iMC cas count read"); + fclose(fp); + + return -1; + } + fclose(fp); + + get_read_event_and_umask(cas_count_cfg, count); + + return 0; +} + +/* + * A system can have 'n' number of iMC (Integrated Memory Controller) + * counters, get that 'n'. Discover the properties of the available + * counters in support of needed performance measurement via perf. + * For each iMC counter get it's type and config. Also obtain each + * counter's event and umask for the memory read events that will be + * measured. + * + * Enumerate all these details into an array of structures. + * + * Return: >= 0 on success. < 0 on failure. + */ +static int num_of_imcs(void) +{ + char imc_dir[512], *temp; + unsigned int count = 0; + struct dirent *ep; + int ret; + DIR *dp; + + dp = opendir(DYN_PMU_PATH); + if (dp) { + while ((ep = readdir(dp))) { + temp = strstr(ep->d_name, UNCORE_IMC); + if (!temp) + continue; + + /* + * imc counters are named as "uncore_imc_<n>", hence + * increment the pointer to point to <n>. Note that + * sizeof(UNCORE_IMC) would count for null character as + * well and hence the last underscore character in + * uncore_imc'_' need not be counted. + */ + temp = temp + sizeof(UNCORE_IMC); + + /* + * Some directories under "DYN_PMU_PATH" could have + * names like "uncore_imc_free_running", hence, check if + * first character is a numerical digit or not. + */ + if (temp[0] >= '0' && temp[0] <= '9') { + sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH, + ep->d_name); + ret = read_from_imc_dir(imc_dir, count); + if (ret) { + closedir(dp); + + return ret; + } + count++; + } + } + closedir(dp); + if (count == 0) { + ksft_print_msg("Unable to find iMC counters\n"); + + return -1; + } + } else { + ksft_perror("Unable to open PMU directory"); + + return -1; + } + + return count; +} + +int initialize_read_mem_bw_imc(void) +{ + int imc; + + imcs = num_of_imcs(); + if (imcs <= 0) + return imcs; + + /* Initialize perf_event_attr structures for all iMC's */ + for (imc = 0; imc < imcs; imc++) + read_mem_bw_initialize_perf_event_attr(imc); + + return 0; +} + +static void perf_close_imc_read_mem_bw(void) +{ + int mc; + + for (mc = 0; mc < imcs; mc++) { + if (imc_counters_config[mc].fd != -1) + close(imc_counters_config[mc].fd); + } +} + +/* + * perf_open_imc_read_mem_bw - Open perf fds for IMCs + * @cpu_no: CPU number that the benchmark PID is bound to + * + * Return: = 0 on success. < 0 on failure. + */ +static int perf_open_imc_read_mem_bw(int cpu_no) +{ + int imc, ret; + + for (imc = 0; imc < imcs; imc++) + imc_counters_config[imc].fd = -1; + + for (imc = 0; imc < imcs; imc++) { + ret = open_perf_read_event(imc, cpu_no); + if (ret) + goto close_fds; + } + + return 0; + +close_fds: + perf_close_imc_read_mem_bw(); + return -1; +} + +/* + * do_imc_read_mem_bw_test - Perform memory bandwidth test + * + * Runs memory bandwidth test over one second period. Also, handles starting + * and stopping of the IMC perf counters around the test. + */ +static void do_imc_read_mem_bw_test(void) +{ + int imc; + + for (imc = 0; imc < imcs; imc++) + read_mem_bw_ioctl_perf_event_ioc_reset_enable(imc); + + sleep(1); + + /* Stop counters after a second to get results. */ + for (imc = 0; imc < imcs; imc++) + read_mem_bw_ioctl_perf_event_ioc_disable(imc); +} + +/* + * get_read_mem_bw_imc - Memory read bandwidth as reported by iMC counters + * + * Memory read bandwidth utilized by a process on a socket can be calculated + * using iMC counters' read events. Perf events are used to read these + * counters. + * + * Return: = 0 on success. < 0 on failure. + */ +static int get_read_mem_bw_imc(float *bw_imc) +{ + float reads = 0, of_mul_read = 1; + int imc; + + /* + * Log read event values from all iMC counters into + * struct imc_counter_config. + * Take overflow into consideration before calculating total bandwidth. + */ + for (imc = 0; imc < imcs; imc++) { + struct imc_counter_config *r = + &imc_counters_config[imc]; + + if (read(r->fd, &r->return_value, + sizeof(struct membw_read_format)) == -1) { + ksft_perror("Couldn't get read bandwidth through iMC"); + return -1; + } + + __u64 r_time_enabled = r->return_value.time_enabled; + __u64 r_time_running = r->return_value.time_running; + + if (r_time_enabled != r_time_running) + of_mul_read = (float)r_time_enabled / + (float)r_time_running; + + reads += r->return_value.value * of_mul_read * SCALE; + } + + *bw_imc = reads; + return 0; +} + +/* + * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" + * @param: Parameters passed to resctrl_val() + * @domain_id: Domain ID (cache ID; for MB, L3 cache ID) + */ +void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, + int domain_id) +{ + sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, + param->ctrlgrp, domain_id); +} + +/* + * Open file to read MBM local bytes from resctrl FS + */ +static FILE *open_mem_bw_resctrl(const char *mbm_bw_file) +{ + FILE *fp; + + fp = fopen(mbm_bw_file, "r"); + if (!fp) + ksft_perror("Failed to open total memory bandwidth file"); + + return fp; +} + +/* + * Get MBM Local bytes as reported by resctrl FS + */ +static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total) +{ + if (fscanf(fp, "%lu\n", mbm_total) <= 0) { + ksft_perror("Could not get MBM local bytes"); + return -1; + } + return 0; +} + +static pid_t bm_pid; + +void ctrlc_handler(int signum, siginfo_t *info, void *ptr) +{ + /* Only kill child after bm_pid is set after fork() */ + if (bm_pid) + kill(bm_pid, SIGKILL); + umount_resctrlfs(); + if (current_test && current_test->cleanup) + current_test->cleanup(); + ksft_print_msg("Ending\n\n"); + + exit(EXIT_SUCCESS); +} + +/* + * Register CTRL-C handler for parent, as it has to kill + * child process before exiting. + */ +int signal_handler_register(const struct resctrl_test *test) +{ + struct sigaction sigact = {}; + int ret = 0; + + bm_pid = 0; + + current_test = test; + sigact.sa_sigaction = ctrlc_handler; + sigemptyset(&sigact.sa_mask); + sigact.sa_flags = SA_SIGINFO; + if (sigaction(SIGINT, &sigact, NULL) || + sigaction(SIGTERM, &sigact, NULL) || + sigaction(SIGHUP, &sigact, NULL)) { + ksft_perror("sigaction"); + ret = -1; + } + return ret; +} + +/* + * Reset signal handler to SIG_DFL. + * Non-Value return because the caller should keep + * the error code of other path even if sigaction fails. + */ +void signal_handler_unregister(void) +{ + struct sigaction sigact = {}; + + current_test = NULL; + sigact.sa_handler = SIG_DFL; + sigemptyset(&sigact.sa_mask); + if (sigaction(SIGINT, &sigact, NULL) || + sigaction(SIGTERM, &sigact, NULL) || + sigaction(SIGHUP, &sigact, NULL)) { + ksft_perror("sigaction"); + } +} + +/* + * print_results_bw: the memory bandwidth results are stored in a file + * @filename: file that stores the results + * @bm_pid: child pid that runs benchmark + * @bw_imc: perf imc counter value + * @bw_resc: memory bandwidth value + * + * Return: 0 on success, < 0 on error. + */ +static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc, + unsigned long bw_resc) +{ + unsigned long diff = fabs(bw_imc - bw_resc); + FILE *fp; + + if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { + printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc); + printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff); + } else { + fp = fopen(filename, "a"); + if (!fp) { + ksft_perror("Cannot open results file"); + + return -1; + } + if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n", + (int)bm_pid, bw_imc, bw_resc, diff) <= 0) { + ksft_print_msg("Could not log results\n"); + fclose(fp); + + return -1; + } + fclose(fp); + } + + return 0; +} + +/* + * measure_read_mem_bw - Measures read memory bandwidth numbers while benchmark runs + * @uparams: User supplied parameters + * @param: Parameters passed to resctrl_val() + * @bm_pid: PID that runs the benchmark + * + * Measure memory bandwidth from resctrl and from another source which is + * perf imc value or could be something else if perf imc event is not + * available. Compare the two values to validate resctrl value. It takes + * 1 sec to measure the data. + * resctrl does not distinguish between read and write operations so + * its data includes all memory operations. + */ +int measure_read_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + unsigned long bw_resc, bw_resc_start, bw_resc_end; + FILE *mem_bw_fp; + float bw_imc; + int ret; + + mem_bw_fp = open_mem_bw_resctrl(mbm_total_path); + if (!mem_bw_fp) + return -1; + + ret = perf_open_imc_read_mem_bw(uparams->cpu); + if (ret < 0) + goto close_fp; + + ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start); + if (ret < 0) + goto close_imc; + + rewind(mem_bw_fp); + + do_imc_read_mem_bw_test(); + + ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end); + if (ret < 0) + goto close_imc; + + ret = get_read_mem_bw_imc(&bw_imc); + if (ret < 0) + goto close_imc; + + perf_close_imc_read_mem_bw(); + fclose(mem_bw_fp); + + bw_resc = (bw_resc_end - bw_resc_start) / MB; + + return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); + +close_imc: + perf_close_imc_read_mem_bw(); +close_fp: + fclose(mem_bw_fp); + return ret; +} + +/* + * resctrl_val: execute benchmark and measure memory bandwidth on + * the benchmark + * @test: test information structure + * @uparams: user supplied parameters + * @param: parameters passed to resctrl_val() + * + * Return: 0 when the test was run, < 0 on error. + */ +int resctrl_val(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *param) +{ + unsigned char *buf = NULL; + cpu_set_t old_affinity; + int domain_id; + int ret = 0; + pid_t ppid; + + if (strcmp(param->filename, "") == 0) + sprintf(param->filename, "stdio"); + + ret = get_domain_id(test->resource, uparams->cpu, &domain_id); + if (ret < 0) { + ksft_print_msg("Could not get domain ID\n"); + return ret; + } + + ppid = getpid(); + + /* Taskset test to specified CPU. */ + ret = taskset_benchmark(ppid, uparams->cpu, &old_affinity); + if (ret) + return ret; + + /* Write test to specified control & monitoring group in resctrl FS. */ + ret = write_bm_pid_to_resctrl(ppid, param->ctrlgrp, param->mongrp); + if (ret) + goto reset_affinity; + + if (param->init) { + ret = param->init(param, domain_id); + if (ret) + goto reset_affinity; + } + + /* + * If not running user provided benchmark, run the default + * "fill_buf". First phase of "fill_buf" is to prepare the + * buffer that the benchmark will operate on. No measurements + * are needed during this phase and prepared memory will be + * passed to next part of benchmark via copy-on-write thus + * no impact on the benchmark that relies on reading from + * memory only. + */ + if (param->fill_buf) { + buf = alloc_buffer(param->fill_buf->buf_size, + param->fill_buf->memflush); + if (!buf) { + ret = -ENOMEM; + goto reset_affinity; + } + } + + fflush(stdout); + bm_pid = fork(); + if (bm_pid == -1) { + ret = -errno; + ksft_perror("Unable to fork"); + goto free_buf; + } + + /* + * What needs to be measured runs in separate process until + * terminated. + */ + if (bm_pid == 0) { + if (param->fill_buf) + fill_cache_read(buf, param->fill_buf->buf_size, false); + else if (uparams->benchmark_cmd[0]) + execvp(uparams->benchmark_cmd[0], (char **)uparams->benchmark_cmd); + exit(EXIT_SUCCESS); + } + + ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); + + /* Give benchmark enough time to fully run. */ + sleep(1); + + /* Test runs until the callback setup() tells the test to stop. */ + while (1) { + ret = param->setup(test, uparams, param); + if (ret == END_OF_TESTS) { + ret = 0; + break; + } + if (ret < 0) + break; + + ret = param->measure(uparams, param, bm_pid); + if (ret) + break; + } + + kill(bm_pid, SIGKILL); +free_buf: + free(buf); +reset_affinity: + taskset_restore(ppid, &old_affinity); + return ret; +} diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c new file mode 100644 index 000000000000..195f04c4d158 --- /dev/null +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -0,0 +1,991 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic resctrl file system operations + * + * Copyright (C) 2018 Intel Corporation + * + * Authors: + * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, + * Fenghua Yu <fenghua.yu@intel.com> + */ +#include <fcntl.h> +#include <limits.h> + +#include "resctrl.h" + +int snc_unreliable; + +static int find_resctrl_mount(char *buffer) +{ + FILE *mounts; + char line[256], *fs, *mntpoint; + + mounts = fopen("/proc/mounts", "r"); + if (!mounts) { + ksft_perror("/proc/mounts"); + return -ENXIO; + } + while (!feof(mounts)) { + if (!fgets(line, 256, mounts)) + break; + fs = strtok(line, " \t"); + if (!fs) + continue; + mntpoint = strtok(NULL, " \t"); + if (!mntpoint) + continue; + fs = strtok(NULL, " \t"); + if (!fs) + continue; + if (strcmp(fs, "resctrl")) + continue; + + fclose(mounts); + if (buffer) + strncpy(buffer, mntpoint, 256); + + return 0; + } + + fclose(mounts); + + return -ENOENT; +} + +/* + * mount_resctrlfs - Mount resctrl FS at /sys/fs/resctrl + * + * Mounts resctrl FS. Fails if resctrl FS is already mounted to avoid + * pre-existing settings interfering with the test results. + * + * Return: 0 on success, < 0 on error. + */ +int mount_resctrlfs(void) +{ + int ret; + + ret = find_resctrl_mount(NULL); + if (ret != -ENOENT) + return -1; + + ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH); + ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL); + if (ret) + ksft_perror("mount"); + + return ret; +} + +int umount_resctrlfs(void) +{ + char mountpoint[256]; + int ret; + + ret = find_resctrl_mount(mountpoint); + if (ret == -ENOENT) + return 0; + if (ret) + return ret; + + if (umount(mountpoint)) { + ksft_perror("Unable to umount resctrl"); + + return -1; + } + + return 0; +} + +/* + * get_cache_level - Convert cache level from string to integer + * @cache_type: Cache level as string + * + * Return: cache level as integer or -1 if @cache_type is invalid. + */ +static int get_cache_level(const char *cache_type) +{ + if (!strcmp(cache_type, "L3")) + return 3; + if (!strcmp(cache_type, "L2")) + return 2; + + ksft_print_msg("Invalid cache level\n"); + return -1; +} + +static int get_resource_cache_level(const char *resource) +{ + /* "MB" use L3 (LLC) as resource */ + if (!strcmp(resource, "MB")) + return 3; + return get_cache_level(resource); +} + +/* + * get_domain_id - Get resctrl domain ID for a specified CPU + * @resource: resource name + * @cpu_no: CPU number + * @domain_id: domain ID (cache ID; for MB, L3 cache ID) + * + * Return: >= 0 on success, < 0 on failure. + */ +int get_domain_id(const char *resource, int cpu_no, int *domain_id) +{ + char phys_pkg_path[1024]; + int cache_num; + FILE *fp; + + cache_num = get_resource_cache_level(resource); + if (cache_num < 0) + return cache_num; + + sprintf(phys_pkg_path, "%s%d/cache/index%d/id", PHYS_ID_PATH, cpu_no, cache_num); + + fp = fopen(phys_pkg_path, "r"); + if (!fp) { + ksft_perror("Failed to open cache id file"); + + return -1; + } + if (fscanf(fp, "%d", domain_id) <= 0) { + ksft_perror("Could not get domain ID"); + fclose(fp); + + return -1; + } + fclose(fp); + + return 0; +} + +/* + * Count number of CPUs in a /sys bitmap + */ +static unsigned int count_sys_bitmap_bits(char *name) +{ + FILE *fp = fopen(name, "r"); + int count = 0, c; + + if (!fp) + return 0; + + while ((c = fgetc(fp)) != EOF) { + if (!isxdigit(c)) + continue; + switch (c) { + case 'f': + count++; + fallthrough; + case '7': case 'b': case 'd': case 'e': + count++; + fallthrough; + case '3': case '5': case '6': case '9': case 'a': case 'c': + count++; + fallthrough; + case '1': case '2': case '4': case '8': + count++; + break; + } + } + fclose(fp); + + return count; +} + +static bool cpus_offline_empty(void) +{ + char offline_cpus_str[64]; + FILE *fp; + + fp = fopen("/sys/devices/system/cpu/offline", "r"); + if (!fp) { + ksft_perror("Could not open /sys/devices/system/cpu/offline"); + return 0; + } + + if (fscanf(fp, "%63s", offline_cpus_str) < 0) { + if (!errno) { + fclose(fp); + return 1; + } + ksft_perror("Could not read /sys/devices/system/cpu/offline"); + } + + fclose(fp); + + return 0; +} + +/* + * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0. + * If any CPUs are offline declare the detection as unreliable. + */ +int snc_nodes_per_l3_cache(void) +{ + int node_cpus, cache_cpus; + static int snc_mode; + + if (!snc_mode) { + snc_mode = 1; + if (!cpus_offline_empty()) { + ksft_print_msg("Runtime SNC detection unreliable due to offline CPUs.\n"); + ksft_print_msg("Setting SNC mode to disabled.\n"); + snc_unreliable = 1; + return snc_mode; + } + node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap"); + cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map"); + + if (!node_cpus || !cache_cpus) { + ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n"); + snc_unreliable = 1; + return snc_mode; + } + snc_mode = cache_cpus / node_cpus; + + if (snc_mode > 1) + ksft_print_msg("SNC-%d mode discovered.\n", snc_mode); + } + + return snc_mode; +} + +/* + * get_cache_size - Get cache size for a specified CPU + * @cpu_no: CPU number + * @cache_type: Cache level L2/L3 + * @cache_size: pointer to cache_size + * + * Return: = 0 on success, < 0 on failure. + */ +int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size) +{ + char cache_path[1024], cache_str[64]; + int length, i, cache_num; + FILE *fp; + + cache_num = get_cache_level(cache_type); + if (cache_num < 0) + return cache_num; + + sprintf(cache_path, "/sys/bus/cpu/devices/cpu%d/cache/index%d/size", + cpu_no, cache_num); + fp = fopen(cache_path, "r"); + if (!fp) { + ksft_perror("Failed to open cache size"); + + return -1; + } + if (fscanf(fp, "%63s", cache_str) <= 0) { + ksft_perror("Could not get cache_size"); + fclose(fp); + + return -1; + } + fclose(fp); + + length = (int)strlen(cache_str); + + *cache_size = 0; + + for (i = 0; i < length; i++) { + if ((cache_str[i] >= '0') && (cache_str[i] <= '9')) + + *cache_size = *cache_size * 10 + (cache_str[i] - '0'); + + else if (cache_str[i] == 'K') + + *cache_size = *cache_size * 1024; + + else if (cache_str[i] == 'M') + + *cache_size = *cache_size * 1024 * 1024; + + else + break; + } + + /* + * The amount of cache represented by each bit in the masks + * in the schemata file is reduced by a factor equal to SNC + * nodes per L3 cache. + * E.g. on a SNC-2 system with a 100MB L3 cache a test that + * allocates memory from its local SNC node (default behavior + * without using libnuma) will only see 50 MB llc_occupancy + * with a fully populated L3 mask in the schemata file. + */ + if (cache_num == 3) + *cache_size /= snc_nodes_per_l3_cache(); + return 0; +} + +#define CORE_SIBLINGS_PATH "/sys/bus/cpu/devices/cpu" + +/* + * get_bit_mask - Get bit mask from given file + * @filename: File containing the mask + * @mask: The bit mask returned as unsigned long + * + * Return: = 0 on success, < 0 on failure. + */ +static int get_bit_mask(const char *filename, unsigned long *mask) +{ + FILE *fp; + + if (!filename || !mask) + return -1; + + fp = fopen(filename, "r"); + if (!fp) { + ksft_print_msg("Failed to open bit mask file '%s': %s\n", + filename, strerror(errno)); + return -1; + } + + if (fscanf(fp, "%lx", mask) <= 0) { + ksft_print_msg("Could not read bit mask file '%s': %s\n", + filename, strerror(errno)); + fclose(fp); + + return -1; + } + fclose(fp); + + return 0; +} + +/* + * resource_info_unsigned_get - Read an unsigned value from + * /sys/fs/resctrl/info/@resource/@filename + * @resource: Resource name that matches directory name in + * /sys/fs/resctrl/info + * @filename: File in /sys/fs/resctrl/info/@resource + * @val: Contains read value on success. + * + * Return: = 0 on success, < 0 on failure. On success the read + * value is saved into @val. + */ +int resource_info_unsigned_get(const char *resource, const char *filename, + unsigned int *val) +{ + char file_path[PATH_MAX]; + FILE *fp; + + snprintf(file_path, sizeof(file_path), "%s/%s/%s", INFO_PATH, resource, + filename); + + fp = fopen(file_path, "r"); + if (!fp) { + ksft_print_msg("Error opening %s: %m\n", file_path); + return -1; + } + + if (fscanf(fp, "%u", val) <= 0) { + ksft_print_msg("Could not get contents of %s: %m\n", file_path); + fclose(fp); + return -1; + } + + fclose(fp); + return 0; +} + +/* + * create_bit_mask- Create bit mask from start, len pair + * @start: LSB of the mask + * @len Number of bits in the mask + */ +unsigned long create_bit_mask(unsigned int start, unsigned int len) +{ + return ((1UL << len) - 1UL) << start; +} + +/* + * count_contiguous_bits - Returns the longest train of bits in a bit mask + * @val A bit mask + * @start The location of the least-significant bit of the longest train + * + * Return: The length of the contiguous bits in the longest train of bits + */ +unsigned int count_contiguous_bits(unsigned long val, unsigned int *start) +{ + unsigned long last_val; + unsigned int count = 0; + + while (val) { + last_val = val; + val &= (val >> 1); + count++; + } + + if (start) { + if (count) + *start = ffsl(last_val) - 1; + else + *start = 0; + } + + return count; +} + +/* + * get_full_cbm - Get full Cache Bit Mask (CBM) + * @cache_type: Cache type as "L2" or "L3" + * @mask: Full cache bit mask representing the maximal portion of cache + * available for allocation, returned as unsigned long. + * + * Return: = 0 on success, < 0 on failure. + */ +int get_full_cbm(const char *cache_type, unsigned long *mask) +{ + char cbm_path[PATH_MAX]; + int ret; + + if (!cache_type) + return -1; + + snprintf(cbm_path, sizeof(cbm_path), "%s/%s/cbm_mask", + INFO_PATH, cache_type); + + ret = get_bit_mask(cbm_path, mask); + if (ret || !*mask) + return -1; + + return 0; +} + +/* + * get_shareable_mask - Get shareable mask from shareable_bits + * @cache_type: Cache type as "L2" or "L3" + * @shareable_mask: Shareable mask returned as unsigned long + * + * Return: = 0 on success, < 0 on failure. + */ +static int get_shareable_mask(const char *cache_type, unsigned long *shareable_mask) +{ + char mask_path[PATH_MAX]; + + if (!cache_type) + return -1; + + snprintf(mask_path, sizeof(mask_path), "%s/%s/shareable_bits", + INFO_PATH, cache_type); + + return get_bit_mask(mask_path, shareable_mask); +} + +/* + * get_mask_no_shareable - Get Cache Bit Mask (CBM) without shareable bits + * @cache_type: Cache type as "L2" or "L3" + * @mask: The largest exclusive portion of the cache out of the + * full CBM, returned as unsigned long + * + * Parts of a cache may be shared with other devices such as GPU. This function + * calculates the largest exclusive portion of the cache where no other devices + * besides CPU have access to the cache portion. + * + * Return: = 0 on success, < 0 on failure. + */ +int get_mask_no_shareable(const char *cache_type, unsigned long *mask) +{ + unsigned long full_mask, shareable_mask; + unsigned int start, len; + + if (get_full_cbm(cache_type, &full_mask) < 0) + return -1; + if (get_shareable_mask(cache_type, &shareable_mask) < 0) + return -1; + + len = count_contiguous_bits(full_mask & ~shareable_mask, &start); + if (!len) + return -1; + + *mask = create_bit_mask(start, len); + + return 0; +} + +/* + * taskset_benchmark - Taskset PID (i.e. benchmark) to a specified cpu + * @bm_pid: PID that should be binded + * @cpu_no: CPU number at which the PID would be binded + * @old_affinity: When not NULL, set to old CPU affinity + * + * Return: 0 on success, < 0 on error. + */ +int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity) +{ + cpu_set_t my_set; + + if (old_affinity) { + CPU_ZERO(old_affinity); + if (sched_getaffinity(bm_pid, sizeof(*old_affinity), + old_affinity)) { + ksft_perror("Unable to read CPU affinity"); + return -1; + } + } + + CPU_ZERO(&my_set); + CPU_SET(cpu_no, &my_set); + + if (sched_setaffinity(bm_pid, sizeof(cpu_set_t), &my_set)) { + ksft_perror("Unable to taskset benchmark"); + + return -1; + } + + return 0; +} + +/* + * taskset_restore - Taskset PID to the earlier CPU affinity + * @bm_pid: PID that should be reset + * @old_affinity: The old CPU affinity to restore + * + * Return: 0 on success, < 0 on error. + */ +int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity) +{ + if (sched_setaffinity(bm_pid, sizeof(*old_affinity), old_affinity)) { + ksft_perror("Unable to restore CPU affinity"); + return -1; + } + + return 0; +} + +/* + * create_grp - Create a group only if one doesn't exist + * @grp_name: Name of the group + * @grp: Full path and name of the group + * @parent_grp: Full path and name of the parent group + * + * Creates a group @grp_name if it does not exist yet. If @grp_name is NULL, + * it is interpreted as the root group which always results in success. + * + * Return: 0 on success, < 0 on error. + */ +static int create_grp(const char *grp_name, char *grp, const char *parent_grp) +{ + int found_grp = 0; + struct dirent *ep; + DIR *dp; + + if (!grp_name) + return 0; + + /* Check if requested grp exists or not */ + dp = opendir(parent_grp); + if (dp) { + while ((ep = readdir(dp)) != NULL) { + if (strcmp(ep->d_name, grp_name) == 0) + found_grp = 1; + } + closedir(dp); + } else { + ksft_perror("Unable to open resctrl for group"); + + return -1; + } + + /* Requested grp doesn't exist, hence create it */ + if (found_grp == 0) { + if (mkdir(grp, 0) == -1) { + ksft_perror("Unable to create group"); + + return -1; + } + } + + return 0; +} + +static int write_pid_to_tasks(char *tasks, pid_t pid) +{ + FILE *fp; + + fp = fopen(tasks, "w"); + if (!fp) { + ksft_perror("Failed to open tasks file"); + + return -1; + } + if (fprintf(fp, "%d\n", (int)pid) < 0) { + ksft_print_msg("Failed to write pid to tasks file\n"); + fclose(fp); + + return -1; + } + fclose(fp); + + return 0; +} + +/* + * write_bm_pid_to_resctrl - Write a PID (i.e. benchmark) to resctrl FS + * @bm_pid: PID that should be written + * @ctrlgrp: Name of the control monitor group (con_mon grp) + * @mongrp: Name of the monitor group (mon grp) + * + * If a con_mon grp is requested, create it and write pid to it, otherwise + * write pid to root con_mon grp. + * If a mon grp is requested, create it and write pid to it, otherwise + * pid is not written, this means that pid is in con_mon grp and hence + * should consult con_mon grp's mon_data directory for results. + * + * Return: 0 on success, < 0 on error. + */ +int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp) +{ + char controlgroup[128], monitorgroup[512], monitorgroup_p[256]; + char tasks[1024]; + int ret = 0; + + if (ctrlgrp) + sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp); + else + sprintf(controlgroup, "%s", RESCTRL_PATH); + + /* Create control and monitoring group and write pid into it */ + ret = create_grp(ctrlgrp, controlgroup, RESCTRL_PATH); + if (ret) + goto out; + sprintf(tasks, "%s/tasks", controlgroup); + ret = write_pid_to_tasks(tasks, bm_pid); + if (ret) + goto out; + + /* Create monitor group and write pid into if it is used */ + if (mongrp) { + sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); + sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); + ret = create_grp(mongrp, monitorgroup, monitorgroup_p); + if (ret) + goto out; + + sprintf(tasks, "%s/mon_groups/%s/tasks", + controlgroup, mongrp); + ret = write_pid_to_tasks(tasks, bm_pid); + if (ret) + goto out; + } + +out: + ksft_print_msg("Writing benchmark parameters to resctrl FS\n"); + if (ret) + ksft_print_msg("Failed writing to resctrlfs\n"); + + return ret; +} + +/* + * write_schemata - Update schemata of a con_mon grp + * @ctrlgrp: Name of the con_mon grp + * @schemata: Schemata that should be updated to + * @cpu_no: CPU number that the benchmark PID is binded to + * @resource: Resctrl resource (Eg: MB, L3, L2, etc.) + * + * Update schemata of a con_mon grp *only* if requested resctrl resource is + * allocation type + * + * Return: 0 on success, < 0 on error. + */ +int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, + const char *resource) +{ + char controlgroup[1024], reason[128], schema[1024] = {}; + int domain_id, fd, schema_len, ret = 0; + + if (!schemata) { + ksft_print_msg("Skipping empty schemata update\n"); + + return -1; + } + + if (get_domain_id(resource, cpu_no, &domain_id) < 0) { + sprintf(reason, "Failed to get domain ID"); + ret = -1; + + goto out; + } + + if (ctrlgrp) + sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp); + else + sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); + + schema_len = snprintf(schema, sizeof(schema), "%s:%d=%s\n", + resource, domain_id, schemata); + if (schema_len < 0 || schema_len >= sizeof(schema)) { + snprintf(reason, sizeof(reason), + "snprintf() failed with return value : %d", schema_len); + ret = -1; + goto out; + } + + fd = open(controlgroup, O_WRONLY); + if (fd < 0) { + snprintf(reason, sizeof(reason), + "open() failed : %s", strerror(errno)); + ret = -1; + + goto err_schema_not_empty; + } + if (write(fd, schema, schema_len) < 0) { + snprintf(reason, sizeof(reason), + "write() failed : %s", strerror(errno)); + close(fd); + ret = -1; + + goto err_schema_not_empty; + } + close(fd); + +err_schema_not_empty: + schema[schema_len - 1] = 0; +out: + ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n", + schema, ret ? " # " : "", + ret ? reason : ""); + + return ret; +} + +bool check_resctrlfs_support(void) +{ + FILE *inf = fopen("/proc/filesystems", "r"); + DIR *dp; + char *res; + bool ret = false; + + if (!inf) + return false; + + res = fgrep(inf, "nodev\tresctrl\n"); + + if (res) { + ret = true; + free(res); + } + + fclose(inf); + + ksft_print_msg("%s Check kernel supports resctrl filesystem\n", + ret ? "Pass:" : "Fail:"); + + if (!ret) + return ret; + + dp = opendir(RESCTRL_PATH); + ksft_print_msg("%s Check resctrl mountpoint \"%s\" exists\n", + dp ? "Pass:" : "Fail:", RESCTRL_PATH); + if (dp) + closedir(dp); + + ksft_print_msg("resctrl filesystem %s mounted\n", + find_resctrl_mount(NULL) ? "not" : "is"); + + return ret; +} + +char *fgrep(FILE *inf, const char *str) +{ + char line[256]; + int slen = strlen(str); + + while (!feof(inf)) { + if (!fgets(line, 256, inf)) + break; + if (strncmp(line, str, slen)) + continue; + + return strdup(line); + } + + return NULL; +} + +/* + * resctrl_resource_exists - Check if a resource is supported. + * @resource: Resctrl resource (e.g., MB, L3, L2, L3_MON, etc.) + * + * Return: True if the resource is supported, else false. False is + * also returned if resctrl FS is not mounted. + */ +bool resctrl_resource_exists(const char *resource) +{ + char res_path[PATH_MAX]; + struct stat statbuf; + int ret; + + if (!resource) + return false; + + ret = find_resctrl_mount(NULL); + if (ret) + return false; + + snprintf(res_path, sizeof(res_path), "%s/%s", INFO_PATH, resource); + + if (stat(res_path, &statbuf)) + return false; + + return true; +} + +/* + * resctrl_mon_feature_exists - Check if requested monitoring feature is valid. + * @resource: Resource that uses the mon_features file. Currently only L3_MON + * is valid. + * @feature: Required monitor feature (in mon_features file). + * + * Return: True if the feature is supported, else false. + */ +bool resctrl_mon_feature_exists(const char *resource, const char *feature) +{ + char res_path[PATH_MAX]; + char *res; + FILE *inf; + + if (!feature || !resource) + return false; + + snprintf(res_path, sizeof(res_path), "%s/%s/mon_features", INFO_PATH, resource); + inf = fopen(res_path, "r"); + if (!inf) + return false; + + res = fgrep(inf, feature); + free(res); + fclose(inf); + + return !!res; +} + +/* + * resource_info_file_exists - Check if a file is present inside + * /sys/fs/resctrl/info/@resource. + * @resource: Required resource (Eg: MB, L3, L2, etc.) + * @file: Required file. + * + * Return: True if the /sys/fs/resctrl/info/@resource/@file exists, else false. + */ +bool resource_info_file_exists(const char *resource, const char *file) +{ + char res_path[PATH_MAX]; + struct stat statbuf; + + if (!file || !resource) + return false; + + snprintf(res_path, sizeof(res_path), "%s/%s/%s", INFO_PATH, resource, + file); + + if (stat(res_path, &statbuf)) + return false; + + return true; +} + +bool test_resource_feature_check(const struct resctrl_test *test) +{ + return resctrl_resource_exists(test->resource); +} + +int filter_dmesg(void) +{ + char line[1024]; + FILE *fp; + int pipefds[2]; + pid_t pid; + int ret; + + ret = pipe(pipefds); + if (ret) { + ksft_perror("pipe"); + return ret; + } + fflush(stdout); + pid = fork(); + if (pid == 0) { + close(pipefds[0]); + dup2(pipefds[1], STDOUT_FILENO); + execlp("dmesg", "dmesg", NULL); + ksft_perror("Executing dmesg"); + exit(1); + } + close(pipefds[1]); + fp = fdopen(pipefds[0], "r"); + if (!fp) { + ksft_perror("fdopen(pipe)"); + kill(pid, SIGTERM); + + return -1; + } + + while (fgets(line, 1024, fp)) { + if (strstr(line, "intel_rdt:")) + ksft_print_msg("dmesg: %s", line); + if (strstr(line, "resctrl:")) + ksft_print_msg("dmesg: %s", line); + } + fclose(fp); + waitpid(pid, NULL, 0); + + return 0; +} + +int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, + int group_fd, unsigned long flags) +{ + int ret; + + ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, + group_fd, flags); + return ret; +} + +unsigned int count_bits(unsigned long n) +{ + unsigned int count = 0; + + while (n) { + count += n & 1; + n >>= 1; + } + + return count; +} + +/** + * snc_kernel_support - Check for existence of mon_sub_L3_00 file that indicates + * SNC resctrl support on the kernel side. + * + * Return: 0 if not supported, 1 if SNC is disabled or SNC discovery is + * unreliable or SNC is both enabled and supported. + */ +int snc_kernel_support(void) +{ + char node_path[PATH_MAX]; + struct stat statbuf; + int ret; + + ret = snc_nodes_per_l3_cache(); + /* + * If SNC is disabled then its kernel support isn't important. If SNC + * got disabled because the discovery process was unreliable the + * snc_unreliable variable was set. It can be used to verify the SNC + * discovery reliability elsewhere in the selftest. + */ + if (ret == 1) + return ret; + + snprintf(node_path, sizeof(node_path), "%s/%s", RESCTRL_PATH, + "mon_data/mon_L3_00/mon_sub_L3_00"); + + if (!stat(node_path, &statbuf)) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/resctrl/settings b/tools/testing/selftests/resctrl/settings new file mode 100644 index 000000000000..a383f3d4565b --- /dev/null +++ b/tools/testing/selftests/resctrl/settings @@ -0,0 +1,3 @@ +# If running time is longer than 120 seconds when new tests are added in +# the future, increase timeout here. +timeout=120 |
