From d76bb7a09bb3b8711077912f3e80cfcf39cd9d0b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 27 May 2020 00:38:38 -0400 Subject: tools/power turbostat: Print /dev/cpu_dma_latency Users are puzzled when they use tuned performance and all their C-states vanish. Dump /dev/cpu_dma_latency and state whether the value is default, or constraining, to explain this situation. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 33b370865d16..4c679568fda4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4698,6 +4698,32 @@ unsigned int intel_model_duplicates(unsigned int model) } return model; } + +void print_dev_latency(void) +{ + char *path = "/dev/cpu_dma_latency"; + int fd; + int value; + int retval; + + fd = open(path, O_RDONLY); + if (fd < 0) { + warn("fopen %s\n", path); + return; + } + + retval = read(fd, (void *)&value, sizeof(int)); + if (retval != sizeof(int)) { + warn("read %s\n", path); + close(fd); + return; + } + fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", + value, value == 2000000000 ? "default" : "constrained"); + + close(fd); +} + void process_cpuid() { unsigned int eax, ebx, ecx, edx; @@ -4966,6 +4992,8 @@ void process_cpuid() if (!quiet) dump_cstate_pstate_config_info(family, model); + if (!quiet) + print_dev_latency(); if (!quiet) dump_sysfs_cstate_config(); if (!quiet) -- cgit From 9aefc2cda6353f48708415d9adc5dff4deb73412 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Thu, 26 Mar 2020 13:36:37 -0700 Subject: tools/power turbostat: Always print idle in the system configuration header If the --quiet option is not used, turbostat prints a useful system configuration header during startup. But inclusion of idle system configuration information in this header is currently a function of inclusion in the columns chosen to be displayed. Always list this idle system configuration. Signed-off-by: Doug Smythies Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4c679568fda4..4edad3fc760a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3530,9 +3530,6 @@ dump_sysfs_cstate_config(void) int state; char *sp; - if (!DO_BIC(BIC_sysfs)) - return; - if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { fprintf(outf, "cpuidle not loaded\n"); return; -- cgit From 7c2ccc507bd44d17227930181f937b2066565349 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 18 Apr 2020 16:31:47 +0800 Subject: tools/power turbostat: Make the energy variable to be 64 bit Change the energy variable from 32bit to 64bit, so that it can record long time duration. After this conversion, adjust the DELTA_WRAP32() accordingly. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4edad3fc760a..29ec1018852d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -211,12 +211,12 @@ struct pkg_data { long long gfx_rc6_ms; unsigned int gfx_mhz; unsigned int package_id; - unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ - unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ - unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ - unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ - unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ - unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ + unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */ + unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */ + unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */ + unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */ + unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ + unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; unsigned long long counter[MAX_ADDED_COUNTERS]; } *package_even, *package_odd; @@ -858,13 +858,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "pc10: %016llX\n", p->pc10); outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); - outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); - outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); - outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); - outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram); - outp += sprintf(outp, "Throttle PKG: %0X\n", + outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg); + outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores); + outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx); + outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram); + outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status); - outp += sprintf(outp, "Throttle RAM: %0X\n", + outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); @@ -1210,11 +1210,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ } #define DELTA_WRAP32(new, old) \ - if (new > old) { \ - old = new - old; \ - } else { \ - old = 0x100000000 + new - old; \ - } + old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); int delta_package(struct pkg_data *new, struct pkg_data *old) -- cgit From 87e15da95775a2ffb8c444e84f08ca982b758364 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 18 Apr 2020 16:31:57 +0800 Subject: tools/power turbostat: Introduce functions to accumulate RAPL consumption Since the RAPL Joule Counter is 32 bit, turbostat would only print a *star* instead of printing the actual energy consumed to indicate the overflow due to long duration. This does not meet the requirement from servers as the sampling time of turbostat is usually very long on servers. So maintain a set of MSR buffer, and update them periodically before the 32bit MSR register is wrapped round, so as to avoid the overflow. The idea is similar to the implementation of ktime_get(): Periodical MSR timer: total_rapl_sum += (current_rapl_msr - last_rapl_msr); Using get_msr_sum() to get the accumulated RAPL: return (current_rapl_msr - last_rapl_msr) + total_rapl_sum; The accumulated RAPL mechanism will be turned on in next patch. Originally-by: Aaron Lu Reviewed-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 2 +- tools/power/x86/turbostat/turbostat.c | 224 +++++++++++++++++++++++++++++++++- 2 files changed, 219 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 2b6551269e43..d08765531bcb 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -16,7 +16,7 @@ override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c @mkdir -p $(BUILD_OUTPUT) - $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap -lrt .PHONY : clean clean : diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 29ec1018852d..c1759f6c84a8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -259,6 +259,113 @@ struct msr_counter { #define SYSFS_PERCPU (1 << 1) }; +/* + * The accumulated sum of MSR is defined as a monotonic + * increasing MSR, it will be accumulated periodically, + * despite its register's bit width. + */ +enum { + IDX_PKG_ENERGY, + IDX_DRAM_ENERGY, + IDX_PP0_ENERGY, + IDX_PP1_ENERGY, + IDX_PKG_PERF, + IDX_DRAM_PERF, + IDX_COUNT, +}; + +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); + +struct msr_sum_array { + /* get_msr_sum() = sum + (get_msr() - last) */ + struct { + /*The accumulated MSR value is updated by the timer*/ + unsigned long long sum; + /*The MSR footprint recorded in last timer*/ + unsigned long long last; + } entries[IDX_COUNT]; +}; + +/* The percpu MSR sum array.*/ +struct msr_sum_array *per_cpu_msr_sum; + +int idx_to_offset(int idx) +{ + int offset; + + switch (idx) { + case IDX_PKG_ENERGY: + offset = MSR_PKG_ENERGY_STATUS; + break; + case IDX_DRAM_ENERGY: + offset = MSR_DRAM_ENERGY_STATUS; + break; + case IDX_PP0_ENERGY: + offset = MSR_PP0_ENERGY_STATUS; + break; + case IDX_PP1_ENERGY: + offset = MSR_PP1_ENERGY_STATUS; + break; + case IDX_PKG_PERF: + offset = MSR_PKG_PERF_STATUS; + break; + case IDX_DRAM_PERF: + offset = MSR_DRAM_PERF_STATUS; + break; + default: + offset = -1; + } + return offset; +} + +int offset_to_idx(int offset) +{ + int idx; + + switch (offset) { + case MSR_PKG_ENERGY_STATUS: + idx = IDX_PKG_ENERGY; + break; + case MSR_DRAM_ENERGY_STATUS: + idx = IDX_DRAM_ENERGY; + break; + case MSR_PP0_ENERGY_STATUS: + idx = IDX_PP0_ENERGY; + break; + case MSR_PP1_ENERGY_STATUS: + idx = IDX_PP1_ENERGY; + break; + case MSR_PKG_PERF_STATUS: + idx = IDX_PKG_PERF; + break; + case MSR_DRAM_PERF_STATUS: + idx = IDX_DRAM_PERF; + break; + default: + idx = -1; + } + return idx; +} + +int idx_valid(int idx) +{ + switch (idx) { + case IDX_PKG_ENERGY: + return do_rapl & RAPL_PKG; + case IDX_DRAM_ENERGY: + return do_rapl & RAPL_DRAM; + case IDX_PP0_ENERGY: + return do_rapl & RAPL_CORES_ENERGY_STATUS; + case IDX_PP1_ENERGY: + return do_rapl & RAPL_GFX; + case IDX_PKG_PERF: + return do_rapl & RAPL_PKG_PERF_STATUS; + case IDX_DRAM_PERF: + return do_rapl & RAPL_DRAM_PERF_STATUS; + default: + return 0; + } +} struct sys_counters { unsigned int added_thread_counters; unsigned int added_core_counters; @@ -1250,12 +1357,12 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->gfx_mhz = new->gfx_mhz; - DELTA_WRAP32(new->energy_pkg, old->energy_pkg); - DELTA_WRAP32(new->energy_cores, old->energy_cores); - DELTA_WRAP32(new->energy_gfx, old->energy_gfx); - DELTA_WRAP32(new->energy_dram, old->energy_dram); - DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); - DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); + old->energy_pkg = new->energy_pkg - old->energy_pkg; + old->energy_cores = new->energy_cores - old->energy_cores; + old->energy_gfx = new->energy_gfx - old->energy_gfx; + old->energy_dram = new->energy_dram - old->energy_dram; + old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status; + old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -3053,6 +3160,111 @@ void do_sleep(void) } } +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) +{ + int ret, idx; + unsigned long long msr_cur, msr_last; + + if (!per_cpu_msr_sum) + return 1; + + idx = offset_to_idx(offset); + if (idx < 0) + return idx; + /* get_msr_sum() = sum + (get_msr() - last) */ + ret = get_msr(cpu, offset, &msr_cur); + if (ret) + return ret; + msr_last = per_cpu_msr_sum[cpu].entries[idx].last; + DELTA_WRAP32(msr_cur, msr_last); + *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; + + return 0; +} + +timer_t timerid; + +/* Timer callback, update the sum of MSRs periodically. */ +static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int i, ret; + int cpu = t->cpu_id; + + for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { + unsigned long long msr_cur, msr_last; + int offset; + + if (!idx_valid(i)) + continue; + offset = idx_to_offset(i); + if (offset < 0) + continue; + ret = get_msr(cpu, offset, &msr_cur); + if (ret) { + fprintf(outf, "Can not update msr(0x%x)\n", offset); + continue; + } + + msr_last = per_cpu_msr_sum[cpu].entries[i].last; + per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; + + DELTA_WRAP32(msr_cur, msr_last); + per_cpu_msr_sum[cpu].entries[i].sum += msr_last; + } + return 0; +} + +static void +msr_record_handler(union sigval v) +{ + for_all_cpus(update_msr_sum, EVEN_COUNTERS); +} + +void msr_sum_record(void) +{ + struct itimerspec its; + struct sigevent sev; + + per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); + if (!per_cpu_msr_sum) { + fprintf(outf, "Can not allocate memory for long time MSR.\n"); + return; + } + /* + * Signal handler might be restricted, so use thread notifier instead. + */ + memset(&sev, 0, sizeof(struct sigevent)); + sev.sigev_notify = SIGEV_THREAD; + sev.sigev_notify_function = msr_record_handler; + + sev.sigev_value.sival_ptr = &timerid; + if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { + fprintf(outf, "Can not create timer.\n"); + goto release_msr; + } + + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 1; + /* + * A wraparound time has been calculated early. + * Some sources state that the peak power for a + * microprocessor is usually 1.5 times the TDP rating, + * use 2 * TDP for safety. + */ + its.it_interval.tv_sec = rapl_joule_counter_range / 2; + its.it_interval.tv_nsec = 0; + + if (timer_settime(timerid, 0, &its, NULL) == -1) { + fprintf(outf, "Can not set timer.\n"); + goto release_timer; + } + return; + + release_timer: + timer_delete(timerid); + release_msr: + free(per_cpu_msr_sum); +} void turbostat_loop() { -- cgit From 9972d5d84d76982606806b2ce887f70c2f8ba60a Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 18 Apr 2020 16:32:05 +0800 Subject: tools/power turbostat: Enable accumulate RAPL display Enable the accumulated RAPL display by default. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 38 +++++++++++++++-------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c1759f6c84a8..66c468262020 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1169,14 +1169,7 @@ int format_counters(struct thread_data *t, struct core_data *c, } } - /* - * If measurement interval exceeds minimum RAPL Joule Counter range, - * indicate that results are suspect by printing "**" in fraction place. - */ - if (interval_float < rapl_joule_counter_range) - fmt8 = "%s%.2f"; - else - fmt8 = "%6.0f**"; + fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); @@ -2069,39 +2062,39 @@ retry: p->sys_lpi = cpuidle_cur_sys_lpi_us; if (do_rapl & RAPL_PKG) { - if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr)) return -13; - p->energy_pkg = msr & 0xFFFFFFFF; + p->energy_pkg = msr; } if (do_rapl & RAPL_CORES_ENERGY_STATUS) { - if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr)) return -14; - p->energy_cores = msr & 0xFFFFFFFF; + p->energy_cores = msr; } if (do_rapl & RAPL_DRAM) { - if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) return -15; - p->energy_dram = msr & 0xFFFFFFFF; + p->energy_dram = msr; } if (do_rapl & RAPL_GFX) { - if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr)) return -16; - p->energy_gfx = msr & 0xFFFFFFFF; + p->energy_gfx = msr; } if (do_rapl & RAPL_PKG_PERF_STATUS) { - if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr)) return -16; - p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; + p->rapl_pkg_perf_status = msr; } if (do_rapl & RAPL_DRAM_PERF_STATUS) { - if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr)) return -16; - p->rapl_dram_perf_status = msr & 0xFFFFFFFF; + p->rapl_dram_perf_status = msr; } if (do_rapl & RAPL_AMD_F17H) { - if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr)) + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr)) return -13; - p->energy_pkg = msr & 0xFFFFFFFF; + p->energy_pkg = msr; } if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) @@ -6101,6 +6094,7 @@ int main(int argc, char **argv) return 0; } + msr_sum_record(); /* * if any params left, it must be a command to fork */ -- cgit From 8201a0285789fade1c5b031914577e2b27a64f05 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 29 Jun 2020 15:26:57 -0400 Subject: tools/power turbostat: Use sched_getcpu() instead of hardcoded cpu 0 Disabling cpu 0 results in an error turbostat: /sys/devices/system/cpu/cpu0/topology/thread_siblings: open failed: No such file or directory Use sched_getcpu() instead of a hardcoded cpu 0 to get the max cpu number. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 66c468262020..151a70f2311b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2865,12 +2865,19 @@ void re_initialize(void) void set_max_cpu_num(void) { FILE *filep; + int base_cpu; unsigned long dummy; + char pathname[64]; + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(1, "cannot find calling cpu ID"); + sprintf(pathname, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", + base_cpu); + + filep = fopen_or_die(pathname, "r"); topo.max_cpu_num = 0; - filep = fopen_or_die( - "/sys/devices/system/cpu/cpu0/topology/thread_siblings", - "r"); while (fscanf(filep, "%lx,", &dummy) == 1) topo.max_cpu_num += BITMASK_SIZE; fclose(filep); -- cgit From b88cad57d4d32bb5c53cd8e0ce3a1971062142af Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 8 Jul 2020 12:55:30 +0200 Subject: tools/power turbostat: Replace HTTP links with HTTPS ones: TURBOSTAT UTILITY Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index a6db83a88e85..f6b7e85b121c 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -335,7 +335,7 @@ that they count at TSC rate, which is true on all processors tested to date. .SH REFERENCES Volume 3B: System Programming Guide" -http://www.intel.com/products/processor/manuals/ +https://www.intel.com/products/processor/manuals/ .SH FILES .ta -- cgit From fecb3bc839df64761cc63c9ee9b45c1cad36aee8 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Mon, 10 Aug 2020 10:43:30 -0400 Subject: tools/power turbostat: Fix output formatting for ACPI CST enumeration turbostat formatting is broken with ACPI CST for enumeration. The problem is that the CX_ACPI% is eight characters long which does not work with tab formatting. One simple solution is to remove the underbar from the state name such that C1_ACPI will be displayed as C1ACPI. Signed-off-by: David Arcari Cc: Len Brown Cc: linux-kernel@vger.kernel.org Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 151a70f2311b..56b93e0d9415 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3682,6 +3682,20 @@ int has_config_tdp(unsigned int family, unsigned int model) } } +static void +remove_underbar(char *s) +{ + char *to = s; + + while (*s) { + if (*s != '_') + *to++ = *s; + s++; + } + + *to = 0; +} + static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model) { @@ -3764,6 +3778,8 @@ dump_sysfs_cstate_config(void) *sp = '\0'; fclose(input); + remove_underbar(name_buf); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); input = fopen(path, "r"); @@ -5830,6 +5846,8 @@ void probe_sysfs(void) *sp = '%'; *(sp + 1) = '\0'; + remove_underbar(name_buf); + fclose(input); sprintf(path, "cpuidle/state%d/time", state); @@ -5857,6 +5875,8 @@ void probe_sysfs(void) *sp = '\0'; fclose(input); + remove_underbar(name_buf); + sprintf(path, "cpuidle/state%d/usage", state); if (is_deferred_skip(name_buf)) -- cgit From e7af1ed3fa4756e8df8270a8635d852a94266061 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 13 Aug 2020 19:06:03 -0400 Subject: tools/power turbostat: Support additional CPU model numbers Initial support for models recently added to intel-family.h. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 56b93e0d9415..4ee4e3067681 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4906,6 +4906,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ICELAKE_NNPI: case INTEL_FAM6_TIGERLAKE_L: case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: + case INTEL_FAM6_LAKEFIELD: + case INTEL_FAM6_ALDERLAKE: return INTEL_FAM6_CANNONLAKE_L; case INTEL_FAM6_ATOM_TREMONT_D: @@ -4915,6 +4918,7 @@ unsigned int intel_model_duplicates(unsigned int model) return INTEL_FAM6_ATOM_TREMONT; case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: return INTEL_FAM6_SKYLAKE_X; } return model; -- cgit From c315a09b1b0f491c27d46e9d05f397023a44fb81 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 13 Aug 2020 19:18:22 -0400 Subject: tools/power turbostat: Skip pc8, pc9, pc10 columns, if they are disabled Like we skip PC3 and PC6 columns when the package C-state limit disables them, skip PC8/PC9/CP10 under analogous conditions. Reported-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4ee4e3067681..72c0b19db36e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5186,9 +5186,12 @@ void process_cpuid() BIC_NOT_PRESENT(BIC_Pkgpc7); } if (has_c8910_msrs(family, model)) { - BIC_PRESENT(BIC_Pkgpc8); - BIC_PRESENT(BIC_Pkgpc9); - BIC_PRESENT(BIC_Pkgpc10); + if (pkg_cstate_limit >= PCL__8) + BIC_PRESENT(BIC_Pkgpc8); + if (pkg_cstate_limit >= PCL__9) + BIC_PRESENT(BIC_Pkgpc9); + if (pkg_cstate_limit >= PCL_10) + BIC_PRESENT(BIC_Pkgpc10); } do_irtl_hsw = has_c8910_msrs(family, model); if (has_skl_msrs(family, model)) { -- cgit From 0936cdfbb527a4fa2559292069ebff2e8cf2c843 Mon Sep 17 00:00:00 2001 From: Ondřej Lysoněk Date: Fri, 27 Mar 2020 08:27:12 +0100 Subject: tools/power x86_energy_perf_policy: Input/output error in a VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've encountered an issue with x86_energy_perf_policy. If I run it on a machine that I'm told is a qemu-kvm virtual machine running inside a privileged container, I get the following error: x86_energy_perf_policy: /dev/cpu/0/msr offset 0x1ad read failed: Input/output error I get the same error in a Digital Ocean droplet, so that might be a similar environment. I created the following patch which is intended to give a more user-friendly message. It's based on a patch for turbostat from Prarit Bhargava that was posted some time ago. The patch is "[v2] turbostat: Running on virtual machine is not supported" [1]. Given my limited knowledge of the topic, I can't say with confidence that this is the right solution, though (that's why this is not an official patch submission). Also, I'm not sure what the convention with exit codes is in this tool. Also, instead of the error message, perhaps the tool should just not print anything in this case, which is how it behaves in a "regular" VM? [1] https://patchwork.kernel.org/patch/9868587/ Signed-off-by: Ondřej Lysoněk Signed-off-by: Len Brown --- .../x86_energy_perf_policy.c | 67 +++++++++++++++++----- 1 file changed, 54 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 3fe1eed900d4..ff6c6661f075 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -622,6 +622,57 @@ void cmdline(int argc, char **argv) } } +/* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ + FILE *filep = fopen(path, "r"); + + if (!filep) + err(1, "%s: open failed", path); + return filep; +} + +void err_on_hypervisor(void) +{ + FILE *cpuinfo; + char *flags, *hypervisor; + char *buffer; + + /* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */ + cpuinfo = fopen_or_die("/proc/cpuinfo", "ro"); + + buffer = malloc(4096); + if (!buffer) { + fclose(cpuinfo); + err(-ENOMEM, "buffer malloc fail"); + } + + if (!fread(buffer, 1024, 1, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + + flags = strstr(buffer, "flags"); + rewind(cpuinfo); + fseek(cpuinfo, flags - buffer, SEEK_SET); + if (!fgets(buffer, 4096, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + fclose(cpuinfo); + + hypervisor = strstr(buffer, "hypervisor"); + + free(buffer); + + if (hypervisor) + err(-1, + "not supported on this virtual machine"); +} int get_msr(int cpu, int offset, unsigned long long *msr) { @@ -635,8 +686,10 @@ int get_msr(int cpu, int offset, unsigned long long *msr) err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); retval = pread(fd, msr, sizeof(*msr), offset); - if (retval != sizeof(*msr)) + if (retval != sizeof(*msr)) { + err_on_hypervisor(); err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); + } if (debug > 1) fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr); @@ -1086,18 +1139,6 @@ int update_cpu_msrs(int cpu) return 0; } -/* - * Open a file, and exit on failure - */ -FILE *fopen_or_die(const char *path, const char *mode) -{ - FILE *filep = fopen(path, "r"); - - if (!filep) - err(1, "%s: open failed", path); - return filep; -} - unsigned int get_pkg_num(int cpu) { FILE *fp; -- cgit From b4b9156953fea108a9540c262e48eafeeff99ab0 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Wed, 22 Apr 2020 15:02:07 -0700 Subject: tools/power turbostat: Add a new GFXAMHz column that exposes gt_act_freq_mhz. The column already present called GFXMHz reads from gt_cur_freq_mhz, which represents the GT frequency that was requested, but power management might not be able to do that. So the new column will display what the actual frequency GT is running at. Signed-off-by: Rafael Antognolli Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 50 +++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 72c0b19db36e..7a6e91aedf0f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -79,6 +79,7 @@ unsigned long long gfx_cur_rc6_ms; unsigned long long cpuidle_cur_cpu_lpi_us; unsigned long long cpuidle_cur_sys_lpi_us; unsigned int gfx_cur_mhz; +unsigned int gfx_act_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; double rapl_power_units, rapl_time_units; @@ -210,6 +211,7 @@ struct pkg_data { unsigned long long pkg_both_core_gfxe_c0; long long gfx_rc6_ms; unsigned int gfx_mhz; + unsigned int gfx_act_mhz; unsigned int package_id; unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */ unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */ @@ -558,6 +560,7 @@ struct msr_counter bic[] = { { 0x0, "APIC" }, { 0x0, "X2APIC" }, { 0x0, "Die" }, + { 0x0, "GFXAMHz" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -612,6 +615,7 @@ struct msr_counter bic[] = { #define BIC_APIC (1ULL << 48) #define BIC_X2APIC (1ULL << 49) #define BIC_Die (1ULL << 50) +#define BIC_GFXACTMHz (1ULL << 51) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -831,6 +835,9 @@ void print_header(char *delim) if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); if (DO_BIC(BIC_Any_c0)) @@ -1198,6 +1205,10 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); + /* GFXACTMHz */ + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); @@ -1349,6 +1360,7 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; old->gfx_mhz = new->gfx_mhz; + old->gfx_act_mhz = new->gfx_act_mhz; old->energy_pkg = new->energy_pkg - old->energy_pkg; old->energy_cores = new->energy_cores - old->energy_cores; @@ -1565,6 +1577,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->gfx_rc6_ms = 0; p->gfx_mhz = 0; + p->gfx_act_mhz = 0; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) t->counter[i] = 0; @@ -1660,6 +1673,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.gfx_rc6_ms = p->gfx_rc6_ms; average.packages.gfx_mhz = p->gfx_mhz; + average.packages.gfx_act_mhz = p->gfx_act_mhz; average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); @@ -2108,6 +2122,9 @@ retry: if (DO_BIC(BIC_GFXMHz)) p->gfx_mhz = gfx_cur_mhz; + if (DO_BIC(BIC_GFXACTMHz)) + p->gfx_act_mhz = gfx_act_mhz; + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (get_mp(cpu, mp, &p->counter[i])) return -10; @@ -3018,6 +3035,33 @@ int snapshot_gfx_mhz(void) return 0; } +/* + * snapshot_gfx_cur_mhz() + * + * record snapshot of + * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_act_mhz(void) +{ + static FILE *fp; + int retval; + + if (fp == NULL) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); + else { + rewind(fp); + fflush(fp); + } + + retval = fscanf(fp, "%d", &gfx_act_mhz); + if (retval != 1) + err(1, "GFX ACT MHz"); + + return 0; +} + /* * snapshot_cpu_lpi() * @@ -3083,6 +3127,9 @@ int snapshot_proc_sysfs_files(void) if (DO_BIC(BIC_GFXMHz)) snapshot_gfx_mhz(); + if (DO_BIC(BIC_GFXACTMHz)) + snapshot_gfx_act_mhz(); + if (DO_BIC(BIC_CPU_LPI)) snapshot_cpu_lpi_us(); @@ -5236,6 +5283,9 @@ void process_cpuid() if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) BIC_PRESENT(BIC_GFXMHz); + if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXACTMHz); + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) BIC_PRESENT(BIC_CPU_LPI); else -- cgit From 20de0dab238849414d33c81bc96e2db68cc61467 Mon Sep 17 00:00:00 2001 From: Antti Laakso Date: Mon, 17 Aug 2020 18:03:48 +0300 Subject: tools/power turbostat: Remove empty columns for Jacobsville Jacobsville doesn't have Package C2 and C6. Also Core and DRAM RAPL are not available. Adjust output accordingly. Signed-off-by: Antti Laakso Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7a6e91aedf0f..629b809075c1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2286,6 +2286,7 @@ int has_turbo_ratio_group_limits(int family, int model) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_GOLDMONT_D: + case INTEL_FAM6_ATOM_TREMONT_D: return 1; } return 0; @@ -3534,6 +3535,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ pkg_cstate_limits = glm_pkg_cstate_limits; break; default: @@ -3616,6 +3618,17 @@ int is_ehl(unsigned int family, unsigned int model) } return 0; } +int is_jvl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_TREMONT_D: + return 1; + } + return 0; +} int has_turbo_ratio_limit(unsigned int family, unsigned int model) { @@ -4227,6 +4240,14 @@ void rapl_probe_intel(unsigned int family, unsigned int model) BIC_PRESENT(BIC_GFXWatt); } break; + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ + do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + if (rapl_joules) + BIC_PRESENT(BIC_Pkg_J); + else + BIC_PRESENT(BIC_PkgWatt); + break; case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; @@ -4629,6 +4650,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ return 1; } return 0; @@ -4958,9 +4980,6 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ALDERLAKE: return INTEL_FAM6_CANNONLAKE_L; - case INTEL_FAM6_ATOM_TREMONT_D: - return INTEL_FAM6_ATOM_GOLDMONT_D; - case INTEL_FAM6_ATOM_TREMONT_L: return INTEL_FAM6_ATOM_TREMONT; @@ -5214,6 +5233,14 @@ void process_cpuid() BIC_PRESENT(BIC_Mod_c6); use_c1_residency_msr = 1; } + if (is_jvl(family, model)) { + BIC_NOT_PRESENT(BIC_CPU_c3); + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc2); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_NOT_PRESENT(BIC_Pkgpc6); + BIC_NOT_PRESENT(BIC_Pkgpc7); + } if (is_dnv(family, model)) { BIC_PRESENT(BIC_CPU_c1); BIC_NOT_PRESENT(BIC_CPU_c3); -- cgit From 33eb82251af9be47a625ca1578f44e596a3a0ca9 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 17 Aug 2020 17:42:15 -0500 Subject: tools/power turbostat: Support AMD Family 19h Family 19h processors have the same RAPL (Running average power limit) hardware register interface as Family 17h processors. Change the family checks to succeed for Family 17h and above to enable core and package energy measurement on Family 19h machines. Also update the TDP to the largest found at the bottom of the page at amd.com->processors->servers->epyc->2nd-gen-epyc, i.e., the EPYC 7H12. Signed-off-by: Kim Phillips Cc: Len Brown Cc: Len Brown Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 629b809075c1..0869f791ed14 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4162,13 +4162,8 @@ double get_tdp_intel(unsigned int model) double get_tdp_amd(unsigned int family) { - switch (family) { - case 0x17: - case 0x18: - default: - /* This is the max stock TDP of HEDT/Server Fam17h chips */ - return 250.0; - } + /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ + return 280.0; } /* @@ -4358,27 +4353,20 @@ void rapl_probe_amd(unsigned int family, unsigned int model) if (max_extended_level >= 0x80000007) { __cpuid(0x80000007, eax, ebx, ecx, edx); - /* RAPL (Fam 17h) */ + /* RAPL (Fam 17h+) */ has_rapl = edx & (1 << 14); } - if (!has_rapl) + if (!has_rapl || family < 0x17) return; - switch (family) { - case 0x17: /* Zen, Zen+ */ - case 0x18: /* Hygon Dhyana */ - do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - } - break; - default: - return; + do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); } if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) -- cgit From 4be61e6b769fc3f97b58870aa4258e27968f07e1 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Sun, 23 Aug 2020 23:27:02 +0300 Subject: tools/power turbostat: Build with _FILE_OFFSET_BITS=64 For compatibility reasons, Glibc off_t is a 32-bit type on 32-bit x86 unless _FILE_OFFSET_BITS=64 is defined. Add this define, as otherwise reading MSRs with index 0x80000000 and above attempts a pread with a negative offset, which fails. Signed-off-by: Alexander Monakov Tested-by: Liwei Song Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index d08765531bcb..f3e3c94ab9bd 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -12,6 +12,7 @@ turbostat : turbostat.c override CFLAGS += -O2 -Wall -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -D_FILE_OFFSET_BITS=64 override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c -- cgit From 6ff7cb371c4bea3dba03a56d774da925e78a5087 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 29 Sep 2020 17:28:42 -0400 Subject: tools/power turbostat: adjust for temperature offset cpu1: MSR_IA32_TEMPERATURE_TARGET: 0x05640000 (95 C) (100 default - 5 offset) Account for the new "offset" field in MSR_TEMPERATURE_TARGET. While this field is usually zero, ignoring it results in over-stating the current temperature, both per-core and per-package. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 62 ++++++++++++++++------------------- 1 file changed, 29 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0869f791ed14..4122468fb73b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4785,12 +4785,33 @@ double discover_bclk(unsigned int family, unsigned int model) * below this value, including the Digital Thermal Sensor (DTS), * Package Thermal Management Sensor (PTM), and thermal event thresholds. */ -int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int read_tcc_activation_temp() { unsigned long long msr; - unsigned int target_c_local; - int cpu; + unsigned int tcc, target_c, offset_c; + + /* Temperature Target MSR is Nehalem and newer only */ + if (!do_nhm_platform_info) + return 0; + + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) + return 0; + target_c = (msr >> 16) & 0xFF; + + offset_c = (msr >> 24) & 0xF; + + tcc = target_c - offset_c; + + if (!quiet) + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", + base_cpu, msr, tcc, target_c, offset_c); + + return tcc; +} + +int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ /* tcc_activation_temp is used only for dts or ptm */ if (!(do_dts || do_ptm)) return 0; @@ -4799,43 +4820,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; - cpu = t->cpu_id; - if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); - return -1; - } - if (tcc_activation_temp_override != 0) { tcc_activation_temp = tcc_activation_temp_override; - fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", - cpu, tcc_activation_temp); + fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp); return 0; } - /* Temperature Target MSR is Nehalem and newer only */ - if (!do_nhm_platform_info) - goto guess; - - if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) - goto guess; - - target_c_local = (msr >> 16) & 0xFF; - - if (!quiet) - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", - cpu, msr, target_c_local); - - if (!target_c_local) - goto guess; - - tcc_activation_temp = target_c_local; - - return 0; + tcc_activation_temp = read_tcc_activation_temp(); + if (tcc_activation_temp) + return 0; -guess: tcc_activation_temp = TJMAX_DEFAULT; - fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", - cpu, tcc_activation_temp); + fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp); return 0; } -- cgit From 3d7772ea5602b88c7c7f0a50d512171a2eed6659 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 30 Sep 2020 20:58:15 -0400 Subject: tools/power turbostat: harden against cpu hotplug turbostat tends to get confused when CPUs are added and removed while it is running. There are races, such as checking the current cpu, and then reading a sysfs file that depends on that cpu number. Close the two issues that seem to come up the most. First, there is an infinite reset loop detector -- change that to allow more resets before giving up. Secondly, one of those file reads didn't really need to exit the program on failure... Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4122468fb73b..74d2fc6fc78a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1894,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int i; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu); return -1; } @@ -2764,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu) sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); - filep = fopen_or_die(path, "r"); + filep = fopen(path, "r"); + + if (!filep) { + warnx("%s: open failed", path); + return -1; + } do { offset -= BITMASK_SIZE; if (fscanf(filep, "%lx%c", &map, &character) != 2) @@ -2877,7 +2882,7 @@ void re_initialize(void) { free_all_buffers(); setup_all_buffers(); - printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); + fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); } void set_max_cpu_num(void) @@ -3331,7 +3336,7 @@ restart: if (retval < -1) { exit(retval); } else if (retval == -1) { - if (restarted > 1) { + if (restarted > 10) { exit(retval); } re_initialize(); @@ -3926,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); return -1; } @@ -3970,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4058,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4439,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4511,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); return -1; } -- cgit From 472547778de24e2764ab325268dd5b77e6923939 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 22 Oct 2020 13:27:38 -0700 Subject: selftest/bpf: Fix profiler test using CO-RE relocation for enums Instead of hard-coding invalid pids_cgrp_id, use Kconfig to detect the presence of that enum value and CO-RE to capture its actual value in the hosts's kernel. Fixes: 03d4d13fab3f ("selftests/bpf: Add profiler test") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Tested-by: Song Liu Link: https://lore.kernel.org/bpf/20201022202739.3667367-1-andrii@kernel.org --- tools/testing/selftests/bpf/progs/profiler.inc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 00578311a423..30982a7e4d0f 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -243,7 +243,10 @@ static ino_t get_inode_from_kernfs(struct kernfs_node* node) } } -int pids_cgrp_id = 1; +extern bool CONFIG_CGROUP_PIDS __kconfig __weak; +enum cgroup_subsys_id___local { + pids_cgrp_id___local = 123, /* value doesn't matter */ +}; static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, struct task_struct* task, @@ -253,7 +256,9 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); - if (ENABLE_CGROUP_V1_RESOLVER) { + if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { + int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, + pids_cgrp_id___local); #ifdef UNROLL #pragma unroll #endif @@ -262,7 +267,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, cgroups, subsys[i]); if (subsys != NULL) { int subsys_id = BPF_CORE_READ(subsys, ss, id); - if (subsys_id == pids_cgrp_id) { + if (subsys_id == cgrp_id) { proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); break; -- cgit From 3023d8ff3fc60e5d32dc1d05f99ad6ffa12b0033 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Oct 2020 20:04:55 -0700 Subject: kunit: Fix kunit.py --raw_output option Due to the raw_output() function on kunit_parser.py actually being a generator, it only runs if something reads the lines it returns. Since we no-longer do that (parsing doesn't actually happen if raw_output is enabled), it was not printing anything. Fixes: 45ba7a893ad8 ("kunit: kunit_tool: Separate out config/build/exec/parse") Signed-off-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 8019e3dd4c32..744ee9cb0073 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -66,7 +66,6 @@ def isolate_kunit_output(kernel_output): def raw_output(kernel_output): for line in kernel_output: print(line) - yield line DIVIDER = '=' * 60 -- cgit From 3fc48259d5250f7a3ee021ad0492b604c428c564 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Oct 2020 19:28:17 +0300 Subject: kunit: Don't fail test suites if one of them is empty Empty test suite is okay test suite. Don't fail the rest of the test suites if one of them is empty. Fixes: 6ebf5866f2e8 ("kunit: tool: add Python wrappers for running KUnit tests") Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 744ee9cb0073..84a1af2581f5 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -241,7 +241,7 @@ def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite: return None test_suite.name = name expected_test_case_num = parse_subtest_plan(lines) - if not expected_test_case_num: + if expected_test_case_num is None: return None while expected_test_case_num > 0: test_case = parse_test_case(lines) -- cgit From 0d0d245104a42e593adcf11396017a6420c08ba8 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Wed, 21 Oct 2020 13:39:14 -0700 Subject: kunit: tools: fix kunit_tool tests for parsing test plans Some tests logs for kunit_tool tests are missing their test plans causing their tests to fail; fix this by adding the test plans. Fixes: 45dcbb6f5ef7 ("kunit: test: add test plan to KUnit TAP format") Signed-off-by: Brendan Higgins Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_tool_test.py | 32 +++++++++++++++++----- .../kunit/test_data/test_config_printk_time.log | 3 +- .../test_data/test_interrupted_tap_output.log | 3 +- .../test_data/test_kernel_panic_interrupt.log | 3 +- .../kunit/test_data/test_multiple_prefixes.log | 3 +- .../kunit/test_data/test_pound_no_prefix.log | 3 +- tools/testing/kunit/test_data/test_pound_sign.log | 1 + 7 files changed, 36 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 99c3c5671ea4..0b60855fb819 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -179,7 +179,7 @@ class KUnitParserTest(unittest.TestCase): print_mock = mock.patch('builtins.print').start() result = kunit_parser.parse_run_tests( kunit_parser.isolate_kunit_output(file.readlines())) - print_mock.assert_any_call(StrContains("no kunit output detected")) + print_mock.assert_any_call(StrContains('no tests run!')) print_mock.stop() file.close() @@ -198,39 +198,57 @@ class KUnitParserTest(unittest.TestCase): 'test_data/test_config_printk_time.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_ignores_multiple_prefixes(self): prefix_log = get_absolute_path( 'test_data/test_multiple_prefixes.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_mixed_kernel_output(self): mixed_prefix_log = get_absolute_path( 'test_data/test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_poundsign(self): pound_log = get_absolute_path('test_data/test_pound_sign.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_kernel_panic_end(self): panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log') with open(panic_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.TEST_CRASHED, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_pound_no_prefix(self): pound_log = get_absolute_path('test_data/test_pound_no_prefix.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) class KUnitJsonTest(unittest.TestCase): diff --git a/tools/testing/kunit/test_data/test_config_printk_time.log b/tools/testing/kunit/test_data/test_config_printk_time.log index c02ca773946d..6bdb57f76eac 100644 --- a/tools/testing/kunit/test_data/test_config_printk_time.log +++ b/tools/testing/kunit/test_data/test_config_printk_time.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources @@ -28,4 +29,4 @@ [ 0.060000] Stack: [ 0.060000] 602086f8 601bc260 705c0000 705c0000 [ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_interrupted_tap_output.log b/tools/testing/kunit/test_data/test_interrupted_tap_output.log index 5c73fb3a1c6f..1fb677728abe 100644 --- a/tools/testing/kunit/test_data/test_interrupted_tap_output.log +++ b/tools/testing/kunit/test_data/test_interrupted_tap_output.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources @@ -34,4 +35,4 @@ [ 0.060000] Stack: [ 0.060000] 602086f8 601bc260 705c0000 705c0000 [ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log index c045eee75f27..a014ffe9725e 100644 --- a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log +++ b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources @@ -22,4 +23,4 @@ [ 0.060000] Stack: [ 0.060000] 602086f8 601bc260 705c0000 705c0000 [ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_multiple_prefixes.log b/tools/testing/kunit/test_data/test_multiple_prefixes.log index bc48407dcc36..0ad78481a0b4 100644 --- a/tools/testing/kunit/test_data/test_multiple_prefixes.log +++ b/tools/testing/kunit/test_data/test_multiple_prefixes.log @@ -1,6 +1,7 @@ [ 0.060000][ T1] printk: console [mc-1] enabled [ 0.060000][ T1] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000][ T1] TAP version 14 +[ 0.060000][ T1] 1..3 [ 0.060000][ T1] # Subtest: kunit-resource-test [ 0.060000][ T1] 1..5 [ 0.060000][ T1] ok 1 - kunit_resource_test_init_resources @@ -28,4 +29,4 @@ [ 0.060000][ T1] Stack: [ 0.060000][ T1] 602086f8 601bc260 705c0000 705c0000 [ 0.060000][ T1] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file +[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_pound_no_prefix.log b/tools/testing/kunit/test_data/test_pound_no_prefix.log index 2ceb360be7d5..dc4cf09a96d0 100644 --- a/tools/testing/kunit/test_data/test_pound_no_prefix.log +++ b/tools/testing/kunit/test_data/test_pound_no_prefix.log @@ -1,6 +1,7 @@ printk: console [mc-1] enabled random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 TAP version 14 + 1..3 # Subtest: kunit-resource-test 1..5 ok 1 - kunit_resource_test_init_resources @@ -30,4 +31,4 @@ Stack: 602086f8 601bc260 705c0000 705c0000 602086f8 6005fcec 705c0000 6002c6ab - 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file + 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_pound_sign.log b/tools/testing/kunit/test_data/test_pound_sign.log index 28ffa5ba03bf..3f358e3a7ba0 100644 --- a/tools/testing/kunit/test_data/test_pound_sign.log +++ b/tools/testing/kunit/test_data/test_pound_sign.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources -- cgit From dc6bf4da825aa0301a46f55fec7c0bb706af2aad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 26 Oct 2020 16:20:32 -0400 Subject: selftests/ftrace: Use $FUNCTION_FORK to reference kernel fork function Commit cad6967ac108 ("fork: introduce kernel_clone()") replaced "_do_fork()" with "kernel_clone()". The ftrace selftests reference the fork function in several of the tests. The rename will make the tests break, but if those names are changed in the tests, they would then break on older kernels. The same set of tests should pass older kernels if they have previously passed. Obviously, a new test may not work on older kernels if the test was added due to a bug or a new feature. The setup of ftracetest will now create a $FUNCTION_FORK bash variable that will contain "_do_fork" for older kernels and "kernel_clone" for newer ones. It figures out the proper name by examining /proc/kallsyms. Note, available_filter_functions could also be used, but because some tests should be able to pass without function tracing enabled, it could not be used. Fixes: eea11285dab3 ("tracing: switch to kernel_clone()") Signed-off-by: Steven Rostedt (VMware) Acked-by: Masami Hiramatsu Signed-off-by: Shuah Khan --- .../selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc | 2 +- .../ftrace/test.d/dynevent/clear_select_events.tc | 2 +- .../ftrace/test.d/dynevent/generic_clear_event.tc | 2 +- .../ftrace/test.d/ftrace/func-filter-notrace-pid.tc | 2 +- .../selftests/ftrace/test.d/ftrace/func-filter-pid.tc | 2 +- .../ftrace/test.d/ftrace/func-filter-stacktrace.tc | 4 ++-- tools/testing/selftests/ftrace/test.d/functions | 7 +++++++ .../selftests/ftrace/test.d/kprobe/add_and_remove.tc | 2 +- tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc | 2 +- .../testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc | 4 ++-- .../selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_args_string.tc | 4 ++-- .../selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc | 10 +++++----- .../selftests/ftrace/test.d/kprobe/kprobe_args_type.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc | 14 +++++++------- .../selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc | 2 +- .../selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 12 ++++++------ .../selftests/ftrace/test.d/kprobe/kretprobe_args.tc | 4 ++-- tools/testing/selftests/ftrace/test.d/kprobe/profile.tc | 2 +- 19 files changed, 44 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index 3bcd4c3624ee..b4da41d126d5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK echo "p:myevent1 $PLACE" >> dynamic_events echo "r:myevent2 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index 438961971b7e..3a0e2885fff5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index a8603bd23e0d..d3e138e8377f 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc index acb17ce543d2..80541964b927 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_notrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 9f0a9687c773..2f7211254529 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 98305d76bd04..191d116b7883 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -4,9 +4,9 @@ # requires: set_ftrace_filter # flags: instance -echo kernel_clone:stacktrace >> set_ftrace_filter +echo $FUNCTION_FORK:stacktrace >> set_ftrace_filter -grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter +grep -q "$FUNCTION_FORK:stacktrace:unlimited" set_ftrace_filter (echo "forked"; sleep 1) diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index c5dec55b7d95..a6fac927ee82 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -133,6 +133,13 @@ yield() { ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1 } +# The fork function in the kernel was renamed from "_do_fork" to +# "kernel_fork". As older tests should still work with older kernels +# as well as newer kernels, check which version of fork is used on this +# kernel so that the tests can use the fork function for the running kernel. +FUNCTION_FORK=`(if grep '\bkernel_clone\b' /proc/kallsyms > /dev/null; then + echo kernel_clone; else echo '_do_fork'; fi)` + # Since probe event command may include backslash, explicitly use printf "%s" # to NOT interpret it. ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index 9737cd0578a7..2428a3ed78c9 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - adding and removing # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index f9a40af76888..010a8b1d6c1d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - busy event check # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit_fail # this must fail diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index eb543d3cfe5f..a96a1dc7014f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -3,13 +3,13 @@ # description: Kprobe dynamic event with arguments # requires: kprobe_events -echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events +echo "p:testprobe $FUNCTION_FORK \$stack \$stack0 +0(\$stack)" > kprobe_events grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' test -d events/kprobes/testprobe echo 1 > events/kprobes/testprobe/enable ( echo "forked") -grep testprobe trace | grep 'kernel_clone' | \ +grep testprobe trace | grep "$FUNCTION_FORK" | \ grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$' echo 0 > events/kprobes/testprobe/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc index 4e5b63be51c9..a053ee2e7d77 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -5,7 +5,7 @@ grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old -echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events +echo "p:testprobe $FUNCTION_FORK comm=\$comm " > kprobe_events grep testprobe kprobe_events | grep -q 'comm=$comm' test -d events/kprobes/testprobe diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index a1d70588ab21..84285a6f60b0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -30,13 +30,13 @@ esac : "Test get argument (1)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc index bd25dd0ba0d0..717130ed4feb 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then fi : "Test get basic types symbol argument" -echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events -echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events +echo "p:testprobe_u $FUNCTION_FORK arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events +echo "p:testprobe_s $FUNCTION_FORK arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events if grep -q "x8/16/32/64" README; then - echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events + echo "p:testprobe_x $FUNCTION_FORK arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events fi -echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events +echo "p:testprobe_bf $FUNCTION_FORK arg1=@linux_proc_banner:b8@4/32" >> kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable @@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace grep "testprobe_bf:.* arg1=.*" trace : "Test get string symbol argument" -echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events +echo "p:testprobe_str $FUNCTION_FORK arg1=@linux_proc_banner:string" > kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 91fcce1c241c..25b7708eb559 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "x8/16/32/64":README gen_event() { # Bitsize - echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" + echo "p:testprobe $FUNCTION_FORK \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" } check_types() { # s-type u-type x-type bf-type width diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 0d179094191f..5556292601a4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -5,29 +5,29 @@ # prepare echo nop > current_tracer -echo kernel_clone > set_ftrace_filter -echo 'p:testprobe kernel_clone' > kprobe_events +echo $FUNCTION_FORK > set_ftrace_filter +echo "p:testprobe $FUNCTION_FORK" > kprobe_events # kprobe on / ftrace off echo 1 > events/kprobes/testprobe/enable echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe off / ftrace on echo 0 > events/kprobes/testprobe/enable echo > trace ( echo "forked") ! grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo 1 > events/kprobes/testprobe/enable @@ -35,11 +35,11 @@ echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace off echo nop > current_tracer echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 45d90b6c763d..f0d5b7777ed7 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "Create/append/":README # Choose 2 symbols for target -SYM1=kernel_clone +SYM1=$FUNCTION_FORK SYM2=do_exit EVENT_NAME=kprobes/testevent diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 1b5550ef8a9b..fa928b431555 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -86,15 +86,15 @@ esac # multiprobe errors if grep -q "Create/append/" README && grep -q "imm-value" README; then -echo 'p:kprobes/testevent kernel_clone' > kprobe_events +echo "p:kprobes/testevent $FUNCTION_FORK" > kprobe_events check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE # Explicitly use printf "%s" to not interpret \1 -printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events -check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE +printf "%s" "p:kprobes/testevent $FUNCTION_FORK abcd=\\1" > kprobe_events +check_error "p:kprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE +check_error "^p:kprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE fi # %return suffix errors diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 7ae492c204a4..197cc2afd404 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -4,14 +4,14 @@ # requires: kprobe_events # Add new kretprobe event -echo 'r:testprobe2 kernel_clone $retval' > kprobe_events +echo "r:testprobe2 $FUNCTION_FORK \$retval" > kprobe_events grep testprobe2 kprobe_events | grep -q 'arg1=\$retval' test -d events/kprobes/testprobe2 echo 1 > events/kprobes/testprobe2/enable ( echo "forked") -cat trace | grep testprobe2 | grep -q '<- kernel_clone' +cat trace | grep testprobe2 | grep -q "<- $FUNCTION_FORK" echo 0 > events/kprobes/testprobe2/enable echo '-:testprobe2' >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index c4093fc1a773..98166fa3eb91 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -4,7 +4,7 @@ # requires: kprobe_events ! grep -q 'myevent' kprobe_profile -echo p:myevent kernel_clone > kprobe_events +echo "p:myevent $FUNCTION_FORK" > kprobe_events grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile echo 1 > events/kprobes/myevent/enable ( echo "forked" ) -- cgit From e3e40312567087fbe6880f316cb2b0e1f3d8a82c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Oct 2020 14:25:01 +0100 Subject: selftests/ftrace: check for do_sys_openat2 in user-memory test More recent libc implementations are now using openat/openat2 system calls so also add do_sys_openat2 to the tracing so that the test passes on these systems because do_sys_open may not be called. Thanks to Masami Hiramatsu for the help on getting this fix to work correctly. Signed-off-by: Colin Ian King Acked-by: Masami Hiramatsu Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc index a30a9c07290d..d25d01a19778 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '\[u\]' || exit_unsupported :;: "user-memory access syntax and ustring working on user memory";: echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \ > kprobe_events +echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \ + >> kprobe_events grep myevent kprobe_events | \ grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string' echo 1 > events/kprobes/myevent/enable +echo 1 > events/kprobes/myevent2/enable echo > /dev/null echo 0 > events/kprobes/myevent/enable +echo 0 > events/kprobes/myevent2/enable grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"' -- cgit From f825d3f7ed9305e7dd0a3e0a74673a4257d0cc53 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:21 +0300 Subject: selftests: filter kselftest headers from command in lib.mk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1056d3d2c97e ("selftests: enforce local header dependency in lib.mk") added header dependency to the rule, but as the rule uses $^, the headers are added to the compiler command line. This can cause unexpected precompiled header files being generated when compilation fails: $ echo { >> openat2_test.c $ make gcc -Wall -O2 -g -fsanitize=address -fsanitize=undefined openat2_test.c tools/testing/selftests/kselftest_harness.h tools/testing/selftests/kselftest.h helpers.c -o tools/testing/selftests/openat2/openat2_test openat2_test.c:313:1: error: expected identifier or ‘(’ before ‘{’ token 313 | { | ^ make: *** [../lib.mk:140: tools/testing/selftests/openat2/openat2_test] Error 1 $ file openat2_test* openat2_test: GCC precompiled header (version 014) for C openat2_test.c: C source, ASCII text Fix it by filtering out the headers, so that we'll only pass the actual *.c files in the compiler command line. Fixes: 1056d3d2c97e ("selftests: enforce local header dependency in lib.mk") Signed-off-by: Tommi Rantala Acked-by: Kees Cook Reviewed-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 30848ca36555..a5ce26d548e4 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -136,7 +136,7 @@ endif ifeq ($(OVERRIDE_TARGETS),) LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h $(OUTPUT)/%:%.c $(LOCAL_HDRS) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@ $(OUTPUT)/%.o:%.S $(COMPILE.S) $^ -o $@ -- cgit From 1948172fdba5ad643529ddcd00a601c0caa913ed Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:22 +0300 Subject: selftests: pidfd: fix compilation errors due to wait.h Drop unneeded header inclusion to fix pidfd compilation errors seen in Fedora 32: In file included from pidfd_open_test.c:9: ../../../../usr/include/linux/wait.h:17:16: error: expected identifier before numeric constant 17 | #define P_ALL 0 | ^ Signed-off-by: Tommi Rantala Reviewed-by: Kees Cook Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_open_test.c | 1 - tools/testing/selftests/pidfd/pidfd_poll_test.c | 1 - 2 files changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index b9fe75fc3e51..8a59438ccc78 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 4b115444dfe9..610811275357 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -3,7 +3,6 @@ #define _GNU_SOURCE #include #include -#include #include #include #include -- cgit From ef7086347c82c53a6c5238bd2cf31379f6acadde Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:24 +0300 Subject: selftests/harness: prettify SKIP message whitespace again Commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") replaced XFAIL with SKIP in the output. Add one more space to make the output aligned and pretty again. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala Acked-by: Kees Cook Reviewed-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index f19804df244c..d2cba10acc6d 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -126,7 +126,7 @@ snprintf(_metadata->results->reason, \ sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \ if (TH_LOG_ENABLED) { \ - fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ + fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ _metadata->results->reason); \ } \ _metadata->passed = 1; \ -- cgit From 0b18fed98bf96ba5ac14ab7c43c8a3364cb0daf8 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:25 +0300 Subject: selftests: pidfd: use ksft_test_result_skip() when skipping test There's planned tests != run tests in pidfd_test when some test is skipped: $ ./pidfd_test TAP version 13 1..8 [...] # pidfd_send_signal signal recycled pid test: Skipping test # Planned tests != run tests (8 != 7) # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Fix by using ksft_test_result_skip(): $ ./pidfd_test TAP version 13 1..8 [...] ok 8 # SKIP pidfd_send_signal signal recycled pid test: Unsharing pid namespace not permitted # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:1 error:0 Signed-off-by: Tommi Rantala Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index c585aaa2acd8..529eb700ac26 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -330,7 +330,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void) ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n", test_name, PID_RECYCLE); case PIDFD_SKIP: - ksft_print_msg("%s test: Skipping test\n", test_name); + ksft_test_result_skip("%s test: Skipping test\n", test_name); ret = 0; break; case PIDFD_XFAIL: -- cgit From b5ec9fe5be5e02e7db9e79aaa9a1ea7a3419d0b5 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:26 +0300 Subject: selftests: pidfd: skip test on kcmp() ENOSYS Skip test if kcmp() is not available, for example if kernel is compiled without CONFIG_CHECKPOINT_RESTORE=y. Signed-off-by: Tommi Rantala Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_getfd_test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index 7758c98be015..0930e2411dfb 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -204,7 +204,10 @@ TEST_F(child, fetch_fd) fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); ASSERT_GE(fd, 0); - EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd)); + ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd); + if (ret < 0 && errno == ENOSYS) + SKIP(return, "kcmp() syscall not supported"); + EXPECT_EQ(ret, 0); ret = fcntl(fd, F_GETFD); ASSERT_GE(ret, 0); -- cgit From 90da74af349e8a476e1d357da735b8f35b56d4e6 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:27 +0300 Subject: selftests: pidfd: add CONFIG_CHECKPOINT_RESTORE=y to config kcmp syscall is used in pidfd_getfd_test.c, so add CONFIG_CHECKPOINT_RESTORE=y to config to ensure kcmp is available. Signed-off-by: Tommi Rantala Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index bb11de90c0c9..f6f2965e17af 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,3 +4,4 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y -- cgit From 7b9621d4593199aa0268e56081fe730b71c053e6 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:28 +0300 Subject: selftests: pidfd: drop needless linux/kcmp.h inclusion in pidfd_setns_test.c kcmp is not used in pidfd_setns_test.c, so do not include Signed-off-by: Tommi Rantala Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_setns_test.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 1f085b922c6e..6e2f2cd400ca 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "pidfd.h" #include "../clone3/clone3_selftests.h" -- cgit From d2692abd6fa9866fda3052efa5cbd116b9fec56b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:33 +0100 Subject: selftests: kselftest_harness.h: fix kernel-doc markups The kernel-doc markups there is violating the expected syntax, causing it to not parse the name of the markup identifier properly, preventing it to check if the kernel-doc matches the #define below each markup. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/697640045663f1366beb15e76e78b420dac5f5a2.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- tools/testing/selftests/kselftest_harness.h | 44 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index f19804df244c..d747d6b1da1a 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -432,7 +432,7 @@ */ /** - * ASSERT_EQ(expected, seen) + * ASSERT_EQ() * * @expected: expected value * @seen: measured value @@ -443,7 +443,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 1) /** - * ASSERT_NE(expected, seen) + * ASSERT_NE() * * @expected: expected value * @seen: measured value @@ -454,7 +454,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 1) /** - * ASSERT_LT(expected, seen) + * ASSERT_LT() * * @expected: expected value * @seen: measured value @@ -465,7 +465,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 1) /** - * ASSERT_LE(expected, seen) + * ASSERT_LE() * * @expected: expected value * @seen: measured value @@ -476,7 +476,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 1) /** - * ASSERT_GT(expected, seen) + * ASSERT_GT() * * @expected: expected value * @seen: measured value @@ -487,7 +487,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 1) /** - * ASSERT_GE(expected, seen) + * ASSERT_GE() * * @expected: expected value * @seen: measured value @@ -498,7 +498,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 1) /** - * ASSERT_NULL(seen) + * ASSERT_NULL() * * @seen: measured value * @@ -508,7 +508,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 1) /** - * ASSERT_TRUE(seen) + * ASSERT_TRUE() * * @seen: measured value * @@ -518,7 +518,7 @@ __EXPECT(0, "0", seen, #seen, !=, 1) /** - * ASSERT_FALSE(seen) + * ASSERT_FALSE() * * @seen: measured value * @@ -528,7 +528,7 @@ __EXPECT(0, "0", seen, #seen, ==, 1) /** - * ASSERT_STREQ(expected, seen) + * ASSERT_STREQ() * * @expected: expected value * @seen: measured value @@ -539,7 +539,7 @@ __EXPECT_STR(expected, seen, ==, 1) /** - * ASSERT_STRNE(expected, seen) + * ASSERT_STRNE() * * @expected: expected value * @seen: measured value @@ -550,7 +550,7 @@ __EXPECT_STR(expected, seen, !=, 1) /** - * EXPECT_EQ(expected, seen) + * EXPECT_EQ() * * @expected: expected value * @seen: measured value @@ -561,7 +561,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 0) /** - * EXPECT_NE(expected, seen) + * EXPECT_NE() * * @expected: expected value * @seen: measured value @@ -572,7 +572,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 0) /** - * EXPECT_LT(expected, seen) + * EXPECT_LT() * * @expected: expected value * @seen: measured value @@ -583,7 +583,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 0) /** - * EXPECT_LE(expected, seen) + * EXPECT_LE() * * @expected: expected value * @seen: measured value @@ -594,7 +594,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 0) /** - * EXPECT_GT(expected, seen) + * EXPECT_GT() * * @expected: expected value * @seen: measured value @@ -605,7 +605,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 0) /** - * EXPECT_GE(expected, seen) + * EXPECT_GE() * * @expected: expected value * @seen: measured value @@ -616,7 +616,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 0) /** - * EXPECT_NULL(seen) + * EXPECT_NULL() * * @seen: measured value * @@ -626,7 +626,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 0) /** - * EXPECT_TRUE(seen) + * EXPECT_TRUE() * * @seen: measured value * @@ -636,7 +636,7 @@ __EXPECT(0, "0", seen, #seen, !=, 0) /** - * EXPECT_FALSE(seen) + * EXPECT_FALSE() * * @seen: measured value * @@ -646,7 +646,7 @@ __EXPECT(0, "0", seen, #seen, ==, 0) /** - * EXPECT_STREQ(expected, seen) + * EXPECT_STREQ() * * @expected: expected value * @seen: measured value @@ -657,7 +657,7 @@ __EXPECT_STR(expected, seen, ==, 0) /** - * EXPECT_STRNE(expected, seen) + * EXPECT_STRNE() * * @expected: expected value * @seen: measured value -- cgit From 1e6f5dcc1b9ec9068f5d38331cec38b35498edf5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Oct 2020 16:36:45 -0700 Subject: tools, bpftool: Avoid array index warnings. The bpf_caps array is shorter without CAP_BPF, avoid out of bounds reads if this isn't defined. Working around this avoids -Wno-array-bounds with clang. Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Reviewed-by: Tobias Klauser Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201027233646.3434896-1-irogers@google.com --- tools/bpf/bpftool/feature.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index a43a6f10b564..359960a8f1de 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -843,9 +843,14 @@ static int handle_perms(void) else p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'", capability_msg(bpf_caps, 0), +#ifdef CAP_BPF capability_msg(bpf_caps, 1), capability_msg(bpf_caps, 2), - capability_msg(bpf_caps, 3)); + capability_msg(bpf_caps, 3) +#else + "", "", "", "", "", "" +#endif /* CAP_BPF */ + ); goto exit_free; } -- cgit From 0698ac66e01019528f0db4191ae3aaf9978e67da Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Oct 2020 16:36:46 -0700 Subject: tools, bpftool: Remove two unused variables. Avoid an unused variable warning. Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Reviewed-by: Tobias Klauser Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201027233646.3434896-2-irogers@google.com --- tools/bpf/bpftool/skeleton/profiler.bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 4e3512f700c0..ce5b65e07ab1 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -70,7 +70,7 @@ int BPF_PROG(fentry_XXX) static inline void fexit_update_maps(u32 id, struct bpf_perf_event_value *after) { - struct bpf_perf_event_value *before, diff, *accum; + struct bpf_perf_event_value *before, diff; before = bpf_map_lookup_elem(&fentry_readings, &id); /* only account samples with a valid fentry_reading */ @@ -95,7 +95,7 @@ int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value readings[MAX_NUM_MATRICS]; u32 cpu = bpf_get_smp_processor_id(); - u32 i, one = 1, zero = 0; + u32 i, zero = 0; int err; u64 *count; -- cgit From af8afcf1fdd5f365f70e2386c2d8c7a1abd853d7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 29 Oct 2020 03:56:05 +0100 Subject: wireguard: selftests: check that route_me_harder packets use the right sk If netfilter changes the packet mark, the packet is rerouted. The ip_route_me_harder family of functions fails to use the right sk, opting to instead use skb->sk, resulting in a routing loop when used with tunnels. With the next change fixing this issue in netfilter, test for the relevant condition inside our test suite, since wireguard was where the bug was discovered. Reported-by: Chen Minqiang Signed-off-by: Jason A. Donenfeld Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/wireguard/netns.sh | 8 ++++++++ tools/testing/selftests/wireguard/qemu/kernel.config | 2 ++ 2 files changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index d77f4829f1e0..74c69b75f6f5 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -316,6 +316,14 @@ pp sleep 3 n2 ping -W 1 -c 1 192.168.241.1 n1 wg set wg0 peer "$pub2" persistent-keepalive 0 +# Test that sk_bound_dev_if works +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 +# What about when the mark changes and the packet must be rerouted? +n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1 +n1 ping -c 1 -W 1 192.168.241.2 # First the boring case +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case +n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1 + # Test that onion routing works, even when it loops n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 ip1 addr add 192.168.242.1/24 dev wg0 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index d531de13c95b..4eecb432a66c 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -18,10 +18,12 @@ CONFIG_NF_NAT=y CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MARK=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_NAT=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y -- cgit From afabdf3338728c3aaa9f55d127e903dcd5f4acc7 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sun, 1 Nov 2020 17:08:07 -0800 Subject: epoll: add a selftest for epoll timeout race Add a test case to ensure an event is observed by at least one poller when an epoll timeout is used. Signed-off-by: Guantao Liu Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Reviewed-by: Eric Dumazet Reviewed-by: Khazhismel Kumykov Acked-by: Willem de Bruijn Cc: Al Viro Cc: Davidlohr Bueso Link: https://lkml.kernel.org/r/20201028180202.952079-2-soheil.kdev@gmail.com Signed-off-by: Linus Torvalds --- .../filesystems/epoll/epoll_wakeup_test.c | 95 ++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index d979ff14775a..8f82f99f7748 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3282,4 +3282,99 @@ TEST(epoll60) close(ctx.epfd); } +struct epoll61_ctx { + int epfd; + int evfd; +}; + +static void *epoll61_write_eventfd(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + int64_t l = 1; + + usleep(10950); + write(ctx->evfd, &l, sizeof(l)); + return NULL; +} + +static void *epoll61_epoll_with_timeout(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + int n; + + n = epoll_wait(ctx->epfd, events, 1, 11); + /* + * If epoll returned the eventfd, write on the eventfd to wake up the + * blocking poller. + */ + if (n == 1) { + int64_t l = 1; + + write(ctx->evfd, &l, sizeof(l)); + } + return NULL; +} + +static void *epoll61_blocking_epoll(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + + epoll_wait(ctx->epfd, events, 1, -1); + return NULL; +} + +TEST(epoll61) +{ + struct epoll61_ctx ctx; + struct epoll_event ev; + int i, r; + + ctx.epfd = epoll_create1(0); + ASSERT_GE(ctx.epfd, 0); + ctx.evfd = eventfd(0, EFD_NONBLOCK); + ASSERT_GE(ctx.evfd, 0); + + ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP; + ev.data.ptr = NULL; + r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev); + ASSERT_EQ(r, 0); + + /* + * We are testing a race. Repeat the test case 1000 times to make it + * more likely to fail in case of a bug. + */ + for (i = 0; i < 1000; i++) { + pthread_t threads[3]; + int n; + + /* + * Start 3 threads: + * Thread 1 sleeps for 10.9ms and writes to the evenfd. + * Thread 2 calls epoll with a timeout of 11ms. + * Thread 3 calls epoll with a timeout of -1. + * + * The eventfd write by Thread 1 should either wakeup Thread 2 + * or Thread 3. If it wakes up Thread 2, Thread 2 writes on the + * eventfd to wake up Thread 3. + * + * If no events are missed, all three threads should eventually + * be joinable. + */ + ASSERT_EQ(pthread_create(&threads[0], NULL, + epoll61_write_eventfd, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[1], NULL, + epoll61_epoll_with_timeout, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[2], NULL, + epoll61_blocking_epoll, &ctx), 0); + + for (n = 0; n < ARRAY_SIZE(threads); ++n) + ASSERT_EQ(pthread_join(threads[n], NULL), 0); + } + + close(ctx.epfd); + close(ctx.evfd); +} + TEST_HARNESS_MAIN -- cgit From 7a078d2d18801bba7bde7337a823d7342299acf7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 29 Oct 2020 15:37:07 -0700 Subject: libbpf, hashmap: Fix undefined behavior in hash_bits If bits is 0, the case when the map is empty, then the >> is the size of the register which is undefined behavior - on x86 it is the same as a shift by 0. Fix by handling the 0 case explicitly and guarding calls to hash_bits for empty maps in hashmap__for_each_key_entry and hashmap__for_each_entry_safe. Fixes: e3b924224028 ("libbpf: add resizable non-thread safe internal hashmap") Suggested-by: Andrii Nakryiko , Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201029223707.494059-1-irogers@google.com --- tools/lib/bpf/hashmap.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index d9b385fe808c..10a4c4cd13cf 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -15,6 +15,9 @@ static inline size_t hash_bits(size_t h, int bits) { /* shuffle bits and return requested number of upper bits */ + if (bits == 0) + return 0; + #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) /* LP64 case */ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); @@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur; \ cur = cur->next) \ if (map->equal_fn(cur->key, (_key), map->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ if (map->equal_fn(cur->key, (_key), map->ctx)) -- cgit From b773ea650576f14442f7a546f2b15e64b10ed0eb Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Wed, 28 Oct 2020 13:59:00 -0300 Subject: perf tools: Remove LTO compiler options when building perl support To avoid breaking the build by mixing files compiled with things coming from distro specific compiler options for perl with the rest of perf, i.e. to avoid this: `.gnu.debuglto_.debug_macro' referenced in section `.gnu.debuglto_.debug_macro' of /tmp/build/perf/util/scripting-engines/perf-in.o: defined in discarded section `.gnu.debuglto_.debug_macro[wm4.stdcpredef.h.19.8dc41bed5d9037ff9622e015fb5f0ce3]' of /tmp/build/perf/util/scripting-engines/perf-in.o Noticed on Fedora 33. Signed-off-by: Justin M. Forbes Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1593431 Cc: Jiri Olsa Link: https://src.fedoraproject.org/rpms/kernel-tools/c/589a32b62f0c12516ab7b34e3dd30d450145bfa4?branch=master Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6890fc4b063a..ce8516e4de34 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -749,6 +749,7 @@ else PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) -- cgit From e555b4b8d7b2844a9e48e06a7c3e4f9e44af847f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:26:45 -0300 Subject: perf tools: Update copy of libbpf's hashmap.c To pick the changes in: 85367030a6c7ef33 ("libbpf: Centralize poisoning and poison reallocarray()") 7d9c71e10baa3496 ("libbpf: Extract generic string hashing function for reuse") That don't entail any changes in tools/perf. This addresses this perf build warning: Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h' diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h Not a kernel ABI, its just that this uses the mechanism in place for checking kernel ABI files drift. Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hashmap.c | 3 +++ tools/perf/util/hashmap.h | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c index a405dad068f5..3c20b126d60d 100644 --- a/tools/perf/util/hashmap.c +++ b/tools/perf/util/hashmap.c @@ -15,6 +15,9 @@ /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray + /* start with 4 buckets */ #define HASHMAP_MIN_CAP_BITS 2 diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index e0af36b0e5d8..d9b385fe808c 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits) #endif } +/* generic C-string hashing function */ +static inline size_t str_hash(const char *s) +{ + size_t h = 0; + + while (*s) { + h = h * 31 + *s; + s++; + } + return h; +} + typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); -- cgit From 263e452eff397b370e39d464c8cbd30f6bd59fb9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:29:30 -0300 Subject: tools headers UAPI: Update process_madvise affected files To pick the changes from: ecb8ac8b1f146915 ("mm/madvise: introduce process_madvise() syscall: an external memory hinting API") That addresses these perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Linus Torvalds Cc: Minchan Kim Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 4 +++- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 11 +++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index f2b5d72a46c2..2056318988f7 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2) __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) #define __NR_faccessat2 439 __SYSCALL(__NR_faccessat2, sys_faccessat2) +#define __NR_process_madvise 440 +__SYSCALL(__NR_process_madvise, sys_process_madvise) #undef __NR_syscalls -#define __NR_syscalls 440 +#define __NR_syscalls 441 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 347809649ba2..379819244b91 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -361,12 +361,13 @@ 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise # -# x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. The __x32_compat_sys stubs are created -# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 -# is defined. +# Due to a historical design error, certain syscalls are numbered differently +# in x32 as compared to native x86_64. These syscalls have numbers 512-547. +# Do not add new syscalls to this range. Numbers 548 and above are available +# for non-x32 use. # 512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn @@ -404,3 +405,5 @@ 545 x32 execveat compat_sys_execveat 546 x32 preadv2 compat_sys_preadv64v2 547 x32 pwritev2 compat_sys_pwritev64v2 +# This is the end of the legacy x32 range. Numbers 548 and above are +# not special and are not to be used for x32-specific syscalls. -- cgit From ab8bf5f2e0321f254590ad81c6e230185d88b4e5 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 16 Oct 2020 14:47:18 +0300 Subject: perf tools: Fix crash with non-jited bpf progs The addr in PERF_RECORD_KSYMBOL events for non-jited bpf progs points to the bpf interpreter, ie. within kernel text section. When processing the unregister event, this causes unexpected removal of vmlinux_map, crashing perf later in cleanup: # perf record -- timeout --signal=INT 2s /usr/share/bcc/tools/execsnoop PCOMM PID PPID RET ARGS [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.208 MB perf.data (5155 samples) ] perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed. Aborted (core dumped) # perf script -D|grep KSYM 0 0xa40 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6 0 0xab0 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_8c42dee26e8cd4c2 0 0xb20 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6 108563691893 0x33d98 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3b0 len 0 type 1 flags 0x0 name bpf_prog_bc5697a410556fc2_syscall__execve 108568518458 0x34098 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3f0 len 0 type 1 flags 0x0 name bpf_prog_45e2203c2928704d_do_ret_sys_execve 109301967895 0x34830 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3b0 len 0 type 1 flags 0x1 name bpf_prog_bc5697a410556fc2_syscall__execve 109302007356 0x348b0 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3f0 len 0 type 1 flags 0x1 name bpf_prog_45e2203c2928704d_do_ret_sys_execve perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed. Here the addresses match the bpf interpreter: # grep -e ffffffffa9b6b530 -e ffffffffa9b6b3b0 -e ffffffffa9b6b3f0 /proc/kallsyms ffffffffa9b6b3b0 t __bpf_prog_run224 ffffffffa9b6b3f0 t __bpf_prog_run192 ffffffffa9b6b530 t __bpf_prog_run32 Fix by not allowing vmlinux_map to be removed by PERF_RECORD_KSYMBOL unregister event. Signed-off-by: Tommi Rantala Acked-by: Jiri Olsa Tested-by: Jiri Olsa Link: https://lore.kernel.org/r/20201016114718.54332-1-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 11 ++++++++++- tools/perf/util/symbol.c | 7 +++++++ tools/perf/util/symbol.h | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7d4194ffc5b0..15385ea00190 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -786,11 +786,20 @@ static int machine__process_ksymbol_unregister(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { + struct symbol *sym; struct map *map; map = maps__find(&machine->kmaps, event->ksymbol.addr); - if (map) + if (!map) + return 0; + + if (map != machine->vmlinux_map) maps__remove(&machine->kmaps, map); + else { + sym = dso__find_symbol(map->dso, map->map_ip(map, map->start)); + if (sym) + dso__delete_symbol(map->dso, sym); + } return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 6138866665df..0d14abdf3d72 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -515,6 +515,13 @@ void dso__insert_symbol(struct dso *dso, struct symbol *sym) } } +void dso__delete_symbol(struct dso *dso, struct symbol *sym) +{ + rb_erase_cached(&sym->rb_node, &dso->symbols); + symbol__delete(sym); + dso__reset_find_symbol_cache(dso); +} + struct symbol *dso__find_symbol(struct dso *dso, u64 addr) { if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f4801c488def..954d6a049ee2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -131,6 +131,8 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map); void dso__insert_symbol(struct dso *dso, struct symbol *sym); +void dso__delete_symbol(struct dso *dso, + struct symbol *sym); struct symbol *dso__find_symbol(struct dso *dso, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); -- cgit From a6293f36ac92ab513771a98efe486477be2f981f Mon Sep 17 00:00:00 2001 From: Stanislav Ivanichkin Date: Tue, 27 Oct 2020 12:43:57 +0300 Subject: perf trace: Fix segfault when trying to trace events by cgroup # ./perf trace -e sched:sched_switch -G test -a sleep 1 perf: Segmentation fault Obtained 11 stack frames. ./perf(sighandler_dump_stack+0x43) [0x55cfdc636db3] /lib/x86_64-linux-gnu/libc.so.6(+0x3efcf) [0x7fd23eecafcf] ./perf(parse_cgroups+0x36) [0x55cfdc673f36] ./perf(+0x3186ed) [0x55cfdc70d6ed] ./perf(parse_options_subcommand+0x629) [0x55cfdc70e999] ./perf(cmd_trace+0x9c2) [0x55cfdc5ad6d2] ./perf(+0x1e8ae0) [0x55cfdc5ddae0] ./perf(+0x1e8ded) [0x55cfdc5ddded] ./perf(main+0x370) [0x55cfdc556f00] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe6) [0x7fd23eeadb96] ./perf(_start+0x29) [0x55cfdc557389] Segmentation fault # It happens because "struct trace" in option->value is passed to the parse_cgroups function instead of "struct evlist". Fixes: 9ea42ba4411ac ("perf trace: Support setting cgroups as targets") Signed-off-by: Stanislav Ivanichkin Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Dmitry Monakhov Link: http://lore.kernel.org/lkml/20201027094357.94881-1-sivanichkin@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 44a75f234db1..de80534473af 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4639,9 +4639,9 @@ do_concat: err = 0; if (lists[0]) { - struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", - "event selector. use 'perf list' to list available events", - parse_events_option); + struct option o = { + .value = &trace->evlist, + }; err = parse_events_option(&o, lists[0], 0); } out: @@ -4655,9 +4655,12 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u { struct trace *trace = opt->value; - if (!list_empty(&trace->evlist->core.entries)) - return parse_cgroups(opt, str, unset); - + if (!list_empty(&trace->evlist->core.entries)) { + struct option o = { + .value = &trace->evlist, + }; + return parse_cgroups(&o, str, unset); + } trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); return 0; -- cgit From 0dfbe4c646bf06a85c3d70572a8b8aa6ebffe3d5 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 23 Oct 2020 08:53:34 +0800 Subject: perf vendor events: Fix DRAM_BW_Use 0 issue for CLX/SKX Ian reports an issue that the metric DRAM_BW_Use often remains 0. The metric expression for DRAM_BW_Use on CLX/SKX: "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time" The counts of uncore_imc/cas_count_read/ and uncore_imc/cas_count_write/ are scaled up by 64, that is to turn a count of cache lines into bytes, the count is then divided by 1000000000 to give GB. However, the counts of uncore_imc/cas_count_read/ and uncore_imc/cas_count_write/ have been scaled yet. The scale values are from sysfs, such as /sys/devices/uncore_imc_0/events/cas_count_read.scale. It's 6.103515625e-5 (64 / 1024.0 / 1024.0). So if we use original metric expression, the result is not correct. But the difficulty is, for SKL client, the counts are not scaled. The metric expression for DRAM_BW_Use on SKL: "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000" root@kbl-ppc:~# perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 190 arb/event=0x84,umask=0x1/ # 1.86 DRAM_BW_Use 29,093,178 arb/event=0x81,umask=0x1/ 1,000,703,287 ns duration_time 1.000703287 seconds time elapsed The result is expected. So the easy way is just change the metric expression for CLX/SKX. This patch changes the metric expression to: "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time" 1048576 = 1024 * 1024. Before (tested on CLX): root@lkp-csl-2sp5 ~# perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 765.35 MiB uncore_imc/cas_count_read/ # 0.00 DRAM_BW_Use 5.42 MiB uncore_imc/cas_count_write/ 1001515088 ns duration_time 1.001515088 seconds time elapsed After: root@lkp-csl-2sp5 ~# perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 767.95 MiB uncore_imc/cas_count_read/ # 0.80 DRAM_BW_Use 5.02 MiB uncore_imc/cas_count_write/ 1001900010 ns duration_time 1.001900010 seconds time elapsed Fixes: 038d3b53c284 ("perf vendor events intel: Update CascadelakeX events to v1.08") Fixes: b5ff7f2799a4 ("perf vendor events: Update SkylakeX events to v1.21") Signed-off-by: Jin Yao Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201023005334.7869-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index de3193552277..00f4fcffa815 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -329,7 +329,7 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time", "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index f31794d3b926..0dd8b13b5cfb 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -323,7 +323,7 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time", "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, -- cgit From 9ae1e990f1ab522b98baefbfebf3cbac1a2cfac2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 28 Oct 2020 09:11:23 +0100 Subject: perf tools: Remove broken __no_tail_call attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GCC specific __attribute__((optimize)) attribute does not what is commonly expected and is explicitly recommended against using in production code by the GCC people. Unlike what is often expected, it doesn't add to the optimization flags, but it fully replaces them, loosing any and all optimization flags provided by the compiler commandline. The only guaranteed upon means of inhibiting tail-calls is by placing a volatile asm with side-effects after the call such that the tail-call simply cannot be done. Given the original commit wasn't specific on which calls were the problem, this removal might re-introduce the problem, which can then be re-analyzed and cured properly. Signed-off-by: Peter Zijlstra Acked-by: Ard Biesheuvel Acked-by: Miguel Ojeda Cc: Alexei Starovoitov Cc: Arnd Bergmann Cc: Arvind Sankar Cc: Daniel Borkmann Cc: Geert Uytterhoeven Cc: Ian Rogers Cc: Josh Poimboeuf Cc: Kees Kook Cc: Martin Liška Cc: Nick Desaulniers Cc: Randy Dunlap Cc: Thomas Gleixner Link: http://lore.kernel.org/lkml/20201028081123.GT2628@hirez.programming.kicks-ass.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 12 ------------ tools/include/linux/compiler.h | 3 --- tools/perf/tests/dwarf-unwind.c | 10 +++++----- 3 files changed, 5 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index b9d4322e1e65..95c072b70d0e 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -27,18 +27,6 @@ #define __pure __attribute__((pure)) #endif #define noinline __attribute__((noinline)) -#ifdef __has_attribute -#if __has_attribute(disable_tail_calls) -#define __no_tail_call __attribute__((disable_tail_calls)) -#endif -#endif -#ifndef __no_tail_call -#if GCC_VERSION > 40201 -#define __no_tail_call __attribute__((optimize("no-optimize-sibling-calls"))) -#else -#define __no_tail_call -#endif -#endif #ifndef __packed #define __packed __attribute__((packed)) #endif diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 2b3f7353e891..d22a974372c0 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -47,9 +47,6 @@ #ifndef noinline #define noinline #endif -#ifndef __no_tail_call -#define __no_tail_call -#endif /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2491d167bf76..83638097c3bc 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -95,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return strcmp((const char *) symbol, funcs[idx]); } -__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) +noinline int test_dwarf_unwind__thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -126,7 +126,7 @@ __no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) static int global_unwind_retval = -INT_MAX; -__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) +noinline int test_dwarf_unwind__compare(void *p1, void *p2) { /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; @@ -145,7 +145,7 @@ __no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) return p1 - p2; } -__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) +noinline int test_dwarf_unwind__krava_3(struct thread *thread) { struct thread *array[2] = {thread, thread}; void *fp = &bsearch; @@ -164,12 +164,12 @@ __no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) return global_unwind_retval; } -__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread) +noinline int test_dwarf_unwind__krava_2(struct thread *thread) { return test_dwarf_unwind__krava_3(thread); } -__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread) +noinline int test_dwarf_unwind__krava_1(struct thread *thread) { return test_dwarf_unwind__krava_2(thread); } -- cgit From d0e7b0c71fbb653de90a7163ef46912a96f0bdaf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 Oct 2020 08:24:38 -0300 Subject: perf scripting python: Avoid declaring function pointers with a visibility attribute To avoid this: util/scripting-engines/trace-event-python.c: In function 'python_start_script': util/scripting-engines/trace-event-python.c:1595:2: error: 'visibility' attribute ignored [-Werror=attributes] 1595 | PyMODINIT_FUNC (*initfunc)(void); | ^~~~~~~~~~~~~~ That started breaking when building with PYTHON=python3 and these gcc versions (I haven't checked with the clang ones, maybe it breaks there as well): # export PERF_TARBALL=http://192.168.86.5/perf/perf-5.9.0.tar.xz # dm fedora:33 fedora:rawhide 1 107.80 fedora:33 : Ok gcc (GCC) 10.2.1 20201005 (Red Hat 10.2.1-5), clang version 11.0.0 (Fedora 11.0.0-1.fc33) 2 92.47 fedora:rawhide : Ok gcc (GCC) 10.2.1 20201016 (Red Hat 10.2.1-6), clang version 11.0.0 (Fedora 11.0.0-1.fc34) # Avoid that by ditching that 'initfunc' function pointer with its: #define Py_EXPORTED_SYMBOL _attribute_ ((visibility ("default"))) #define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* And just call PyImport_AppendInittab() at the end of the ifdef python3 block with the functions that were being attributed to that initfunc. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 7cbd024e3e63..c83c2c6564e0 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1592,7 +1592,6 @@ static void _free_command_line(wchar_t **command_line, int num) static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; - PyMODINIT_FUNC (*initfunc)(void); #if PY_MAJOR_VERSION < 3 const char **command_line; #else @@ -1607,20 +1606,18 @@ static int python_start_script(const char *script, int argc, const char **argv) FILE *fp; #if PY_MAJOR_VERSION < 3 - initfunc = initperf_trace_context; command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; + PyImport_AppendInittab(name, initperf_trace_context); #else - initfunc = PyInit_perf_trace_context; command_line = malloc((argc + 1) * sizeof(wchar_t *)); command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); + PyImport_AppendInittab(name, PyInit_perf_trace_context); #endif - - PyImport_AppendInittab(name, initfunc); Py_Initialize(); #if PY_MAJOR_VERSION < 3 -- cgit From ad6330ac2c5a38e5573cb6ae8ff75288bfd96325 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:38:16 -0300 Subject: tools headers UAPI: Sync prctl.h with the kernel sources To get the changes in: 1c101da8b971a366 ("arm64: mte: Allow user control of the tag check mode via prctl()") af5ce95282dc99d0 ("arm64: mte: Allow user control of the generated random tags via prctl()") Which don't cause any change in tooling, only addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' diff -u tools/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Cc: Adrian Hunter Cc: Catalin Marinas Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 07b4f8131e36..7f0827705c9a 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -233,6 +233,15 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit From 9e228f48980635c187720c0956b39c04db5e8f56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:41:58 -0300 Subject: tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the changes in: 13149e8bafc46572 ("drm/i915: add syncobj timeline support") cda9edd02425d790 ("drm/i915: introduce a mechanism to extend execbuf2") That don't result in any changes in tooling, just silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Lionel Landwerlin Cc: Namhyung Kim Cc: Rodrigo Vivi Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 59 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 00546062e023..fa1f3d62f9a6 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1046,6 +1052,38 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; +/** + * See drm_i915_gem_execbuffer_ext_timeline_fences. + */ +#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 + +/** + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { + struct i915_user_extension base; + + /** + * Number of element in the handles_ptr & value_ptr arrays. + */ + __u64 fence_count; + + /** + * Pointer to an array of struct drm_i915_gem_exec_fence of length + * fence_count. + */ + __u64 handles_ptr; + + /** + * Pointer to an array of u64 values of length fence_count. Values + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. + */ + __u64 values_ptr; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -1062,8 +1100,14 @@ struct drm_i915_gem_execbuffer2 { __u32 num_cliprects; /** * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY - * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a - * struct drm_i915_gem_exec_fence *fences. + * & I915_EXEC_USE_EXTENSIONS are not set. + * + * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array + * of struct drm_i915_gem_exec_fence and num_cliprects is the length + * of the array. + * + * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a + * single struct i915_user_extension and num_cliprects is 0. */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (0x3f) @@ -1181,7 +1225,16 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_SUBMIT (1 << 20) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) +/* + * Setting I915_EXEC_USE_EXTENSIONS implies that + * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked + * list of i915_user_extension. Each i915_user_extension node is the base of a + * larger structure. The list of supported structures are listed in the + * drm_i915_gem_execbuffer_ext enum. + */ +#define I915_EXEC_USE_EXTENSIONS (1 << 21) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit From d0448d6a249b6fc4518181b214d3403dfe2c8075 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:12:52 -0300 Subject: tools headers UAPI: Update fscrypt.h copy To get the changes from: c7f0207b613033c5 ("fscrypt: make "#define fscrypt_policy" user-only") That don't cause any changes in tools/perf, only addresses this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h' diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h Cc: Adrian Hunter Cc: Eric Biggers Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fscrypt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7875709ccfeb..e5de60336938 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -45,7 +45,6 @@ struct fscrypt_policy_v1 { __u8 flags; __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; }; -#define fscrypt_policy fscrypt_policy_v1 /* * Process-subscribed "logon" key description prefix and payload format. @@ -156,9 +155,9 @@ struct fscrypt_get_key_status_arg { __u32 __out_reserved[13]; }; -#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */ #define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) @@ -170,6 +169,7 @@ struct fscrypt_get_key_status_arg { /* old names; don't add anything new here! */ #ifndef __KERNEL__ +#define fscrypt_policy fscrypt_policy_v1 #define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE #define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4 #define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8 -- cgit From 40a6bbf5149c7302bd7515fb5e2c3d12bac462f5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:21:04 -0300 Subject: tools x86 headers: Update cpufeatures.h headers copies To pick the changes from: 5866e9205b47a983 ("x86/cpu: Add hardware-enforced cache coherency as a CPUID feature") ff4f82816dff28ff ("x86/cpufeatures: Enumerate ENQCMD and ENQCMDS instructions") 360e7c5c4ca4fd8e ("x86/cpufeatures: Add SEV-ES CPU feature") 18ec63faefb3fd31 ("x86/cpufeatures: Enumerate TSX suspend load address tracking instructions") e48cb1a3fb916500 ("x86/resctrl: Enumerate per-thread MBA controls") Which don't cause any changes in tooling, just addresses these build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Fenghua Yu Cc: Ian Rogers Cc: Jiri Olsa Cc: Krish Sadhukhan Cc: Kyung Min Park Cc: Namhyung Kim Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 6 +++++- tools/arch/x86/include/asm/disabled-features.h | 9 ++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 2901d5df4366..dad350d42ecf 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -/* free ( 3*32+17) */ +#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -236,6 +236,7 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -288,6 +289,7 @@ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -353,6 +355,7 @@ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ @@ -368,6 +371,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 4ea8584682f9..5861d34f9771 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,12 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_IOMMU_SUPPORT +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -75,7 +81,8 @@ #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ + DISABLE_ENQCMD) #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) -- cgit From 8b2fc25a945b125c7ee4c36b048ad65f7c04105e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:24:52 -0300 Subject: tools x86 headers: Update required-features.h header from the kernel To pick the changes from: ecac71816a1829c0 ("x86/paravirt: Use CONFIG_PARAVIRT_XXL instead of CONFIG_PARAVIRT") That don entail any changes in tooling, just addressing these perf tools build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/required-features.h' differs from latest version at 'arch/x86/include/asm/required-features.h' diff -u tools/arch/x86/include/asm/required-features.h arch/x86/include/asm/required-features.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Juergen Gross Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/required-features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 6847d85400a8..3ff0d48469f2 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -54,7 +54,7 @@ #endif #ifdef CONFIG_X86_64 -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL /* Paravirtualized systems may not have PSE or PGE available */ #define NEED_PSE 0 #define NEED_PGE 0 -- cgit From 32b734e09ec38a0bb81d05d37056a95584d14c99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:28:04 -0300 Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: 29dcc60f6a19fb0a ("x86/boot/compressed/64: Add stage1 #VC handler") 36e1be8ada994d50 ("perf/x86/amd/ibs: Fix raw sample data accumulation") 59a854e2f3b90ad2 ("perf/x86/intel: Support TopDown metrics on Ice Lake") 7b2c05a15d29d057 ("perf/x86/intel: Generic support for hardware TopDown metrics") 99e40204e014e066 ("x86/msr: Move the F15h MSRs where they belong") b57de6cd16395be1 ("x86/sev-es: Add SEV-ES Feature Detection") ed7bde7a6dab521e ("cpufreq: intel_pstate: Allow enable/disable energy efficiency") f0f2f9feb4ee6f28 ("x86/msr-index: Define an IA32_PASID MSR") That cause these changes in tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2020-10-19 13:27:33.195274425 -0300 +++ after 2020-10-19 13:27:44.144507610 -0300 @@ -113,6 +113,8 @@ [0x00000309] = "CORE_PERF_FIXED_CTR0", [0x0000030a] = "CORE_PERF_FIXED_CTR1", [0x0000030b] = "CORE_PERF_FIXED_CTR2", + [0x0000030c] = "CORE_PERF_FIXED_CTR3", + [0x00000329] = "PERF_METRICS", [0x00000345] = "IA32_PERF_CAPABILITIES", [0x0000038d] = "CORE_PERF_FIXED_CTR_CTRL", [0x0000038e] = "CORE_PERF_GLOBAL_STATUS", @@ -222,6 +224,7 @@ [0x00000774] = "HWP_REQUEST", [0x00000777] = "HWP_STATUS", [0x00000d90] = "IA32_BNDCFGS", + [0x00000d93] = "IA32_PASID", [0x00000da0] = "IA32_XSS", [0x00000dc0] = "LBR_INFO_0", [0x00000ffc] = "IA32_BNDCFGS_RSVD", @@ -279,6 +282,7 @@ [0xc0010115 - x86_AMD_V_KVM_MSRs_offset] = "VM_IGNNE", [0xc0010117 - x86_AMD_V_KVM_MSRs_offset] = "VM_HSAVE_PA", [0xc001011f - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VIRT_SPEC_CTRL", + [0xc0010130 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV_ES_GHCB", [0xc0010131 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV", [0xc0010140 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_ID_LENGTH", [0xc0010141 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_STATUS", $ Which causes these parts of tools/perf/ to be rebuilt: CC /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o DESCEND plugins GEN /tmp/build/perf/python/perf.so INSTALL trace_plugins LD /tmp/build/perf/trace/beauty/tracepoints/perf-in.o LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/per At some point these should just be tables read by perf on demand. This addresses this perf tools build warning: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' Cc: Adrian Hunter Cc: Borislav Petkov Cc: Fenghua Yu Cc: Ian Rogers Cc: Jiri Olsa Cc: Joerg Roedel Cc: Kan Liang Cc: Kim Phillips Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Srinivas Pandruvada Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 2859ee4f39a8..972a34d93505 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -257,6 +257,9 @@ #define MSR_IA32_LASTINTFROMIP 0x000001dd #define MSR_IA32_LASTINTTOIP 0x000001de +#define MSR_IA32_PASID 0x00000d93 +#define MSR_IA32_PASID_VALID BIT_ULL(31) + /* DEBUGCTLMSR bits (others vary by model): */ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 @@ -464,11 +467,15 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL< Date: Mon, 19 Oct 2020 13:36:41 -0300 Subject: tools UAPI: Update copy of linux/mman.h from the kernel sources e47168f3d1b14af5 ("powerpc/8xx: Support 16k hugepages with 4k pages") That don't cause any changes in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/mman.h' differs from latest version at 'include/uapi/linux/mman.h' diff -u tools/include/uapi/linux/mman.h include/uapi/linux/mman.h Cc: Adrian Hunter Cc: Christophe Leroy Cc: Ian Rogers Cc: Jiri Olsa Cc: Michael Ellerman Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mman.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index 923cc162609c..f55bc680b5b0 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -27,6 +27,7 @@ #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK +#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB -- cgit From aa04899a13078e4181146212555a1bbaa387d2c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:42:55 -0300 Subject: tools kvm headers: Update KVM headers from the kernel sources Some should cause changes in tooling, like the one adding LAST_EXCP, but the way it is structured end up not making that happen. The new SVM_EXIT_INVPCID should get used by arch/x86/util/kvm-stat.c, in the svm_exit_reasons table. The tools/perf/trace/beauty part has scripts to catch changes and automagically create tables, like tools/perf/trace/beauty/kvm_ioctl.sh, but changes are needed to make tools/perf/arch/x86/util/kvm-stat.c catch those automatically. These were handled by the existing scripts: $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2020-11-03 08:43:52.910728608 -0300 +++ after 2020-11-03 08:44:04.273959984 -0300 @@ -89,6 +89,7 @@ [0xbf] = "SET_NESTED_STATE", [0xc0] = "CLEAR_DIRTY_LOG", [0xc1] = "GET_SUPPORTED_HV_CPUID", + [0xc6] = "X86_SET_MSR_FILTER", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h $ $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after $ diff -u before after --- before 2020-11-03 08:45:55.522225198 -0300 +++ after 2020-11-03 08:46:12.881578666 -0300 @@ -37,4 +37,5 @@ [0x71] = "VDPA_GET_STATUS", [0x73] = "VDPA_GET_CONFIG", [0x76] = "VDPA_GET_VRING_NUM", + [0x78] = "VDPA_GET_IOVA_RANGE", }; $ This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/sie.h' differs from latest version at 'arch/s390/include/uapi/asm/sie.h' diff -u tools/arch/s390/include/uapi/asm/sie.h arch/s390/include/uapi/asm/sie.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h' diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h Cc: Adrian Hunter Cc: Alexander Yarygin Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Cornelia Huck Cc: David Ahern Cc: Ian Rogers Cc: Jiri Olsa Cc: Joerg Roedel Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/kvm.h | 25 +++++++++++++++++++++++++ tools/arch/s390/include/uapi/asm/sie.h | 2 +- tools/arch/x86/include/uapi/asm/kvm.h | 20 ++++++++++++++++++++ tools/arch/x86/include/uapi/asm/svm.h | 13 +++++++++++++ tools/include/uapi/linux/kvm.h | 19 +++++++++++++++++++ tools/include/uapi/linux/vhost.h | 4 ++++ 6 files changed, 82 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index ba85bb23f060..1c17c3a24411 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -159,6 +159,21 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. + */ +struct kvm_pmu_event_filter { + __u16 base_event; + __u16 nevents; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + + __u8 action; + __u8 pad[3]; +}; + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { @@ -242,6 +257,15 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 + +/* + * Only two states can be presented by the host kernel: + * - NOT_REQUIRED: the guest doesn't need to do anything + * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available) + * + * All the other values are deprecated. The host still accepts all + * values (they are ABI), but will narrow them to the above two. + */ #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 @@ -329,6 +353,7 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h index 6ca1e68d7103..ede318653c87 100644 --- a/tools/arch/s390/include/uapi/asm/sie.h +++ b/tools/arch/s390/include/uapi/asm/sie.h @@ -29,7 +29,7 @@ { 0x13, "SIGP conditional emergency signal" }, \ { 0x15, "SIGP sense running" }, \ { 0x16, "SIGP set multithreading"}, \ - { 0x17, "SIGP store additional status ait address"} + { 0x17, "SIGP store additional status at address"} #define icpt_prog_codes \ { 0x0001, "Prog Operation" }, \ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 0780f97c1850..89e5f3d1bba8 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -192,6 +192,26 @@ struct kvm_msr_list { __u32 indices[0]; }; +/* Maximum size of any access bitmap in bytes */ +#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600 + +/* for KVM_X86_SET_MSR_FILTER */ +struct kvm_msr_filter_range { +#define KVM_MSR_FILTER_READ (1 << 0) +#define KVM_MSR_FILTER_WRITE (1 << 1) + __u32 flags; + __u32 nmsrs; /* number of msrs in bitmap */ + __u32 base; /* MSR index the bitmap starts at */ + __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */ +}; + +#define KVM_MSR_FILTER_MAX_RANGES 16 +struct kvm_msr_filter { +#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) + __u32 flags; + struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; +}; struct kvm_cpuid_entry { __u32 function; diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 2e8a30f06c74..f1d8307454e0 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -29,6 +29,7 @@ #define SVM_EXIT_WRITE_DR6 0x036 #define SVM_EXIT_WRITE_DR7 0x037 #define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_LAST_EXCP 0x05f #define SVM_EXIT_INTR 0x060 #define SVM_EXIT_NMI 0x061 #define SVM_EXIT_SMI 0x062 @@ -76,10 +77,21 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +/* SEV-ES software-defined VMGEXIT events */ +#define SVM_VMGEXIT_MMIO_READ 0x80000001 +#define SVM_VMGEXIT_MMIO_WRITE 0x80000002 +#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003 +#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004 +#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 +#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 +#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff + #define SVM_EXIT_ERR -1 #define SVM_EXIT_REASONS \ @@ -171,6 +183,7 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7d8eced6f459..ca41220b40b8 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -248,6 +248,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_IOAPIC_EOI 26 #define KVM_EXIT_HYPERV 27 #define KVM_EXIT_ARM_NISV 28 +#define KVM_EXIT_X86_RDMSR 29 +#define KVM_EXIT_X86_WRMSR 30 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -413,6 +415,17 @@ struct kvm_run { __u64 esr_iss; __u64 fault_ipa; } arm_nisv; + /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ + struct { + __u8 error; /* user -> kernel */ + __u8 pad[7]; +#define KVM_MSR_EXIT_REASON_INVAL (1 << 0) +#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) +#define KVM_MSR_EXIT_REASON_FILTER (1 << 2) + __u32 reason; /* kernel -> user */ + __u32 index; /* kernel -> user */ + __u64 data; /* kernel <-> user */ + } msr; /* Fix the size of the union. */ char padding[256]; }; @@ -1037,6 +1050,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 #define KVM_CAP_STEAL_TIME 187 +#define KVM_CAP_X86_USER_SPACE_MSR 188 +#define KVM_CAP_X86_MSR_FILTER 189 +#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #ifdef KVM_CAP_IRQ_ROUTING @@ -1538,6 +1554,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) +/* Available with KVM_CAP_X86_MSR_FILTER */ +#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 75232185324a..c998860d7bbc 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -146,4 +146,8 @@ /* Set event fd for config interrupt*/ #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) + +/* Get the valid iova range */ +#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ + struct vhost_vdpa_iova_range) #endif -- cgit From a9e27f5f9827eab25b76155fddcc22ddeeed58d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:49:59 -0300 Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h The diff is just tabs versus spaces, trivial. This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3e5dcdd48a49..b95d3c485d27 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1196,7 +1196,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ /* 1 free */ -#define PERF_MEM_SNOOPX_SHIFT 38 +#define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ -- cgit From 42cc0e70a21faa8e7d7ea8713a3f9cd64bd3f60a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:52:11 -0300 Subject: tools include UAPI: Update linux/mount.h copy To pick the changes from: dab741e0e02bd3c4 ("Add a "nosymfollow" mount option.") That ends up adding support for the new MS_NOSYMFOLLOW mount flag: $ tools/perf/trace/beauty/mount_flags.sh > before $ cp include/uapi/linux/mount.h tools/include/uapi/linux/mount.h $ tools/perf/trace/beauty/mount_flags.sh > after $ diff -u before after --- before 2020-11-03 08:51:28.117997454 -0300 +++ after 2020-11-03 08:51:38.992218869 -0300 @@ -7,6 +7,7 @@ [32 ? (ilog2(32) + 1) : 0] = "REMOUNT", [64 ? (ilog2(64) + 1) : 0] = "MANDLOCK", [128 ? (ilog2(128) + 1) : 0] = "DIRSYNC", + [256 ? (ilog2(256) + 1) : 0] = "NOSYMFOLLOW", [1024 ? (ilog2(1024) + 1) : 0] = "NOATIME", [2048 ? (ilog2(2048) + 1) : 0] = "NODIRATIME", [4096 ? (ilog2(4096) + 1) : 0] = "BIND", $ So now one can use it in --filter expressions for tracepoints. This silences this perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/linux/mount.h' differs from latest version at 'include/uapi/linux/mount.h' diff -u tools/include/uapi/linux/mount.h include/uapi/linux/mount.h Cc: Adrian Hunter Cc: Al Viro Cc: Ian Rogers Cc: Jiri Olsa Cc: Mattias Nissler Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 96a0240f23fe..dd8306ea336c 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -16,6 +16,7 @@ #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ #define MS_NOATIME 1024 /* Do not update access times. */ #define MS_NODIRATIME 2048 /* Do not update directory access times */ #define MS_BIND 4096 -- cgit From 86449b12f626a65d2a2ecfada1e024488471f9e2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 30 Oct 2020 16:54:31 -0700 Subject: perf hists browser: Increase size of 'buf' in perf_evsel__hists_browse() Making perf with gcc-9.1.1 generates the following warning: CC ui/browsers/hists.o ui/browsers/hists.c: In function 'perf_evsel__hists_browse': ui/browsers/hists.c:3078:61: error: '%d' directive output may be \ truncated writing between 1 and 11 bytes into a region of size \ between 2 and 12 [-Werror=format-truncation=] 3078 | "Max event group index to sort is %d (index from 0 to %d)", | ^~ ui/browsers/hists.c:3078:7: note: directive argument in the range [-2147483648, 8] 3078 | "Max event group index to sort is %d (index from 0 to %d)", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from /usr/include/stdio.h:937, from ui/browsers/hists.c:5: IOW, the string in line 3078 might be too long for buf[] of 64 bytes. Fix this by increasing the size of buf[] to 128. Fixes: dbddf1747441 ("perf report/top TUI: Support hotkeys to let user select any event for sorting") Signed-off-by: Song Liu Acked-by: Jiri Olsa Cc: Jin Yao Cc: stable@vger.kernel.org # v5.7+ Link: http://lore.kernel.org/lkml/20201030235431.534417-1-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a07626f07208..b0e1880cf992 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2963,7 +2963,7 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, struct popup_action actions[MAX_OPTIONS]; int nr_options = 0; int key = -1; - char buf[64]; + char buf[128]; int delay_secs = hbt ? hbt->refresh : 0; #define HIST_BROWSER_HELP_COMMON \ -- cgit From 6311951d4f8f28c43b554ff0719027884bedd7e3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Nov 2020 00:31:02 +0100 Subject: perf tools: Initialize output buffer in build_id__sprintf We display garbage for undefined build_id objects, because we don't initialize the output buffer. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201101233103.3537427-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 8763772f1095..6b410c3d52dc 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -102,6 +102,8 @@ int build_id__sprintf(const struct build_id *build_id, char *bf) const u8 *raw = build_id->data; size_t i; + bf[0] = 0x0; + for (i = 0; i < build_id->size; ++i) { sprintf(bid, "%02x", *raw); ++raw; -- cgit From fe01adb72356a4e2f8735e4128af85921ca98fa1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Nov 2020 00:31:03 +0100 Subject: perf tools: Add missing swap for ino_generation We are missing swap for ino_generation field. Fixes: 5c5e854bc760 ("perf tools: Add attr->mmap2 support") Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201101233103.3537427-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7a5f03764702..d20b16ee7377 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -595,6 +595,7 @@ static void perf_event__mmap2_swap(union perf_event *event, event->mmap2.maj = bswap_32(event->mmap2.maj); event->mmap2.min = bswap_32(event->mmap2.min); event->mmap2.ino = bswap_64(event->mmap2.ino); + event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); if (sample_id_all) { void *data = &event->mmap2.filename; -- cgit From 2c589d933e54d183ee2a052971b730e423c62031 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 2 Nov 2020 23:02:28 +0900 Subject: perf tools: Add missing swap for cgroup events It was missed to add a swap function for PERF_RECORD_CGROUP. Fixes: ba78c1c5461c ("perf tools: Basic support for CGROUP event") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201102140228.303657-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d20b16ee7377..098080287c68 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -711,6 +711,18 @@ static void perf_event__namespaces_swap(union perf_event *event, swap_sample_id_all(event, &event->namespaces.link_info[i]); } +static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all) +{ + event->cgroup.id = bswap_64(event->cgroup.id); + + if (sample_id_all) { + void *data = &event->cgroup.path; + + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } +} + static u8 revbyte(u8 b) { int rev = (b >> 4) | ((b & 0xf) << 4); @@ -953,6 +965,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_SWITCH] = perf_event__switch_swap, [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, + [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, -- cgit From 5d020cbd86204e51da05628623a6f9729d4b04c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 09:24:20 -0300 Subject: tools feature: Fixup fast path feature detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 22dd1ac91a776752 ("tools: Remove feature-libelf-mmap feature detection") correctly simplified the this feature detection, but forgot to remove the call to the removed function in the main() function for the test-all.c fast path feature detection, making it fail and thus do all the feature detection individually, fix it. $ cat /tmp/build/perf/feature/test-all.make.output test-all.c: In function ‘main’: test-all.c:188:2: error: implicit declaration of function ‘main_test_libelf_mmap’; did you mean ‘main_test_libelf’? [-Werror=implicit-function-declaration] 188 | main_test_libelf_mmap(); | ^~~~~~~~~~~~~~~~~~~~~ | main_test_libelf cc1: all warnings being treated as errors $ vim tools/build/feature/test-all.c $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ;make V=1 -k O=/tmp/build/perf -C tools/perf install-bin ; perf test python $ cat /tmp/build/perf/feature/test-all.make.output $ Fixes: 22dd1ac91a776752 ("tools: Remove feature-libelf-mmap feature detection") Cc: Alexei Starovoitov Cc: Andrii Nakryiko Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-all.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index a04e81321c66..464873883396 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -185,7 +185,6 @@ int main(int argc, char *argv[]) main_test_libperl(); main_test_hello(); main_test_libelf(); - main_test_libelf_mmap(); main_test_get_current_dir_name(); main_test_gettid(); main_test_glibc(); -- cgit From f78331f74cacb33d87cd60376dacc5bd397959e2 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 3 Nov 2020 10:41:29 +0100 Subject: libbpf: Fix null dereference in xsk_socket__delete Fix a possible null pointer dereference in xsk_socket__delete that will occur if a null pointer is fed into the function. Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices") Reported-by: Andrii Nakryiko Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1604396490-12129-2-git-send-email-magnus.karlsson@gmail.com --- tools/lib/bpf/xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e3c98c007825..504b7a85d445 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -891,13 +891,14 @@ int xsk_umem__delete(struct xsk_umem *umem) void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); - struct xsk_ctx *ctx = xsk->ctx; struct xdp_mmap_offsets off; + struct xsk_ctx *ctx; int err; if (!xsk) return; + ctx = xsk->ctx; if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); -- cgit From 25cf73b9ff88fd4608699a0313f820758b4c252d Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 3 Nov 2020 10:41:30 +0100 Subject: libbpf: Fix possible use after free in xsk_socket__delete Fix a possible use after free in xsk_socket__delete that will happen if xsk_put_ctx() frees the ctx. To fix, save the umem reference taken from the context and just use that instead. Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices") Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1604396490-12129-3-git-send-email-magnus.karlsson@gmail.com --- tools/lib/bpf/xsk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 504b7a85d445..9bc537d0b92d 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -892,6 +892,7 @@ void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); struct xdp_mmap_offsets off; + struct xsk_umem *umem; struct xsk_ctx *ctx; int err; @@ -899,6 +900,7 @@ void xsk_socket__delete(struct xsk_socket *xsk) return; ctx = xsk->ctx; + umem = ctx->umem; if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); @@ -918,11 +920,11 @@ void xsk_socket__delete(struct xsk_socket *xsk) xsk_put_ctx(ctx); - ctx->umem->refcount--; + umem->refcount--; /* Do not close an fd that also has an associated umem connected * to it. */ - if (xsk->fd != ctx->umem->fd) + if (xsk->fd != umem->fd) close(xsk->fd); free(xsk); } -- cgit From f3ae6c6e8a3ea49076d826c64e63ea78fbf9db43 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:30 +0300 Subject: selftests: proc: fix warning: _GNU_SOURCE redefined Makefile already contains -D_GNU_SOURCE, so we can remove it from the *.c files. Signed-off-by: Tommi Rantala Signed-off-by: Shuah Khan --- tools/testing/selftests/proc/proc-loadavg-001.c | 1 - tools/testing/selftests/proc/proc-self-syscall.c | 1 - tools/testing/selftests/proc/proc-uptime-002.c | 1 - 3 files changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index 471e2aa28077..fb4fe9188806 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 9f6d000c0245..8511dcfe67c7 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b7849089..e7ceabed7f51 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -15,7 +15,6 @@ */ // Test that values in /proc/uptime increment monotonically // while shifting across CPUs. -#define _GNU_SOURCE #undef NDEBUG #include #include -- cgit From 1d44d0dd61b6121b49f25b731f2f7f605cb3c896 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:31 +0300 Subject: selftests: core: use SKIP instead of XFAIL in close_range_test.c XFAIL is gone since commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP"), use SKIP instead. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala Reviewed-by: Kees Cook Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/core/close_range_test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index c99b98b0d461..575b391ddc78 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -44,7 +44,7 @@ TEST(close_range) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -52,7 +52,7 @@ TEST(close_range) EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { if (errno == ENOSYS) - XFAIL(return, "close_range() syscall not supported"); + SKIP(return, "close_range() syscall not supported"); } EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); @@ -108,7 +108,7 @@ TEST(close_range_unshare) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -197,7 +197,7 @@ TEST(close_range_unshare_capped) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; -- cgit From afba8b0a2cc532b54eaf4254092f57bba5d7eb65 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:32 +0300 Subject: selftests: clone3: use SKIP instead of XFAIL XFAIL is gone since commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP"), use SKIP instead. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala Reviewed-by: Kees Cook Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 55bd387ce7ec..52d3f0364bda 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -145,7 +145,7 @@ TEST(clone3_cap_checkpoint_restore) test_clone3_supported(); EXPECT_EQ(getuid(), 0) - XFAIL(return, "Skipping all tests as non-root\n"); + SKIP(return, "Skipping all tests as non-root"); memset(&set_tid, 0, sizeof(set_tid)); -- cgit From 7d764b685ee1bc73a9fa2b6cb4d42fa72b943145 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:33 +0300 Subject: selftests: binderfs: use SKIP instead of XFAIL XFAIL is gone since commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP"), use SKIP instead. Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP") Signed-off-by: Tommi Rantala Reviewed-by: Kees Cook Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/filesystems/binderfs/binderfs_test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 1d27f52c61e6..477cbb042f5b 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -74,7 +74,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); EXPECT_EQ(ret, 0) { if (errno == ENODEV) - XFAIL(goto out, "binderfs missing"); + SKIP(goto out, "binderfs missing"); TH_LOG("%s - Failed to mount binderfs", strerror(errno)); goto rmdir; } @@ -475,10 +475,10 @@ TEST(binderfs_stress) TEST(binderfs_test_privileged) { if (geteuid() != 0) - XFAIL(return, "Tests are not run as root. Skipping privileged tests"); + SKIP(return, "Tests are not run as root. Skipping privileged tests"); if (__do_binderfs_test(_metadata)) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); } TEST(binderfs_test_unprivileged) @@ -511,7 +511,7 @@ TEST(binderfs_test_unprivileged) ret = wait_for_pid(pid); if (ret) { if (ret == 2) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); ASSERT_EQ(ret, 0) { TH_LOG("wait_for_pid() failed"); } -- cgit From f9b7ff0d7f7a466a920424246e7ddc2b84c87e52 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 5 Nov 2020 11:52:30 +0000 Subject: tools/bpftool: Fix attaching flow dissector My earlier patch to reject non-zero arguments to flow dissector attach broke attaching via bpftool. Instead of 0 it uses -1 for target_fd. Fix this by passing a zero argument when attaching the flow dissector. Fixes: 1b514239e859 ("bpf: flow_dissector: Check value of unused flags to BPF_PROG_ATTACH") Reported-by: Jiri Benc Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105115230.296657-1-lmb@cloudflare.com --- tools/bpf/bpftool/prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d942c1e3372c..acdb2c245f0a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -940,7 +940,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd, } if (*attach_type == BPF_FLOW_DISSECTOR) { - *mapfd = -1; + *mapfd = 0; return 0; } -- cgit From d3bec0138bfbe58606fc1d6f57a4cdc1a20218db Mon Sep 17 00:00:00 2001 From: David Verbeiren Date: Wed, 4 Nov 2020 12:23:32 +0100 Subject: bpf: Zero-fill re-used per-cpu map element Zero-fill element values for all other cpus than current, just as when not using prealloc. This is the only way the bpf program can ensure known initial values for all cpus ('onallcpus' cannot be set when coming from the bpf program). The scenario is: bpf program inserts some elements in a per-cpu map, then deletes some (or userspace does). When later adding new elements using bpf_map_update_elem(), the bpf program can only set the value of the new elements for the current cpu. When prealloc is enabled, previously deleted elements are re-used. Without the fix, values for other cpus remain whatever they were when the re-used entry was previously freed. A selftest is added to validate correct operation in above scenario as well as in case of LRU per-cpu map element re-use. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: David Verbeiren Signed-off-by: Alexei Starovoitov Acked-by: Matthieu Baerts Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201104112332.15191-1-david.verbeiren@tessares.net --- tools/testing/selftests/bpf/prog_tests/map_init.c | 214 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_map_init.c | 33 ++++ 2 files changed, 247 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/map_init.c create mode 100644 tools/testing/selftests/bpf/progs/test_map_init.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c new file mode 100644 index 000000000000..14a31109dd0e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Tessares SA */ + +#include +#include "test_map_init.skel.h" + +#define TEST_VALUE 0x1234 +#define FILL_VALUE 0xdeadbeef + +static int nr_cpus; +static int duration; + +typedef unsigned long long map_key_t; +typedef unsigned long long map_value_t; +typedef struct { + map_value_t v; /* padding */ +} __bpf_percpu_val_align pcpu_map_value_t; + + +static int map_populate(int map_fd, int num) +{ + pcpu_map_value_t value[nr_cpus]; + int i, err; + map_key_t key; + + for (i = 0; i < nr_cpus; i++) + bpf_percpu(value, i) = FILL_VALUE; + + for (key = 1; key <= num; key++) { + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz, + int *map_fd, int populate) +{ + struct test_map_init *skel; + int err; + + skel = test_map_init__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + err = bpf_map__set_type(skel->maps.hashmap1, map_type); + if (!ASSERT_OK(err, "bpf_map__set_type")) + goto error; + + err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto error; + + err = test_map_init__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto error; + + *map_fd = bpf_map__fd(skel->maps.hashmap1); + if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n")) + goto error; + + err = map_populate(*map_fd, populate); + if (!ASSERT_OK(err, "map_populate")) + goto error_map; + + return skel; + +error_map: + close(*map_fd); +error: + test_map_init__destroy(skel); + return NULL; +} + +/* executes bpf program that updates map with key, value */ +static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key, + map_value_t value) +{ + struct test_map_init__bss *bss; + + bss = skel->bss; + + bss->inKey = key; + bss->inValue = value; + bss->inPid = getpid(); + + if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach")) + return -1; + + /* Let tracepoint trigger */ + syscall(__NR_getpgid); + + test_map_init__detach(skel); + + return 0; +} + +static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected) +{ + int i, nzCnt = 0; + map_value_t val; + + for (i = 0; i < nr_cpus; i++) { + val = bpf_percpu(value, i); + if (val) { + if (CHECK(val != expected, "map value", + "unexpected for cpu %d: 0x%llx\n", i, val)) + return -1; + nzCnt++; + } + } + + if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n", + nzCnt)) + return -1; + + return 0; +} + +/* Add key=1 elem with values set for all CPUs + * Delete elem key=1 + * Run bpf prog that inserts new key=1 elem with value=0x1234 + * (bpf prog can only set value for current CPU) + * Lookup Key=1 and check value is as expected for all CPUs: + * value set by bpf prog for one CPU, 0 for all others + */ +static void test_pcpu_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* max 1 elem in map so insertion is forced to reuse freed entry */ + skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* delete element so the entry can be re-used*/ + key = 1; + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "bpf_map_delete_elem")) + goto cleanup; + + /* run bpf prog that inserts new elem, re-using the slot just freed */ + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=1 was re-created by bpf prog */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +/* Add key=1 and key=2 elems with values set for all CPUs + * Run bpf prog that inserts new key=3 elem + * (only for current cpu; other cpus should have initial value = 0) + * Lookup Key=1 and check value is as expected for all CPUs + */ +static void test_pcpu_lru_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* Set up LRU map with 2 elements, values filled for all CPUs. + * With these 2 elements, the LRU map is full + */ + skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* run bpf prog that inserts new key=3 element, re-using LRU slot */ + key = 3; + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=3 replaced one of earlier elements */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +void test_map_init(void) +{ + nr_cpus = bpf_num_possible_cpus(); + if (nr_cpus <= 1) { + printf("%s:SKIP: >1 cpu needed for this test\n", __func__); + test__skip(); + return; + } + + if (test__start_subtest("pcpu_map_init")) + test_pcpu_map_init(); + if (test__start_subtest("pcpu_lru_map_init")) + test_pcpu_lru_map_init(); +} diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c new file mode 100644 index 000000000000..c89d28ead673 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_init.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Tessares SA */ + +#include "vmlinux.h" +#include + +__u64 inKey = 0; +__u64 inValue = 0; +__u32 inPid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 2); + __type(key, __u64); + __type(value, __u64); +} hashmap1 SEC(".maps"); + + +SEC("tp/syscalls/sys_enter_getpgid") +int sysenter_getpgid(const void *ctx) +{ + /* Just do it for once, when called from our own test prog. This + * ensures the map value is only updated for a single CPU. + */ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid == inPid) + bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST); + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit From df11f7dd5834146defa448acba097e8d7703cc42 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:13 -0700 Subject: selftests: kvm: Fix the segment descriptor layout to match the actual layout Fix the layout of 'struct desc64' to match the layout described in the SDM Vol 3, Chapter 3 "Protected-Mode Memory Management", section 3.4.5 "Segment Descriptors", Figure 3-8 "Segment Descriptor". The test added later in this series relies on this and crashes if this layout is not correct. Signed-off-by: Aaron Lewis Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-2-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- tools/testing/selftests/kvm/lib/x86_64/processor.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 82b7fe16a824..0a65e7bb5249 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -59,7 +59,7 @@ struct gpr64_regs { struct desc64 { uint16_t limit0; uint16_t base0; - unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; uint32_t base3; uint32_t zero1; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index f6eb34eaa0d2..1ccf6c9b3476 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -392,11 +392,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->limit0 = segp->limit & 0xFFFF; desc->base0 = segp->base & 0xFFFF; desc->base1 = segp->base >> 16; - desc->s = segp->s; desc->type = segp->type; + desc->s = segp->s; desc->dpl = segp->dpl; desc->p = segp->present; desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; desc->l = segp->l; desc->db = segp->db; desc->g = segp->g; -- cgit From 85f2a4320ef27ce74b9da0631460561028c48756 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:14 -0700 Subject: selftests: kvm: Clear uc so UCALL_NONE is being properly reported Ensure the out value 'uc' in get_ucall() is properly reporting UCALL_NONE if the call fails. The return value will be correctly reported, however, the out parameter 'uc' will not be. Clear the struct to ensure the correct value is being reported in the out parameter. Signed-off-by: Aaron Lewis Reviewed-by: Andrew Jones Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-3-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/aarch64/ucall.c | 3 +++ tools/testing/selftests/kvm/lib/s390x/ucall.c | 3 +++ tools/testing/selftests/kvm/lib/x86_64/ucall.c | 3 +++ 3 files changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index c8e0ec20d3bf..2f37b90ee1a9 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { vm_vaddr_t gva; diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index fd589dc9bfab..9d3b0f15249a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index da4d89ad5419..a3489973e290 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; -- cgit From 29faeb9632012d6c3fa4aa33c3d589b9ff18b206 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:15 -0700 Subject: selftests: kvm: Add exception handling to selftests Add the infrastructure needed to enable exception handling in selftests. This allows any of the exception and interrupt vectors to be overridden in the guest. Signed-off-by: Aaron Lewis Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-4-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 19 ++-- tools/testing/selftests/kvm/include/kvm_util.h | 2 + .../selftests/kvm/include/x86_64/processor.h | 24 +++++ .../testing/selftests/kvm/lib/aarch64/processor.c | 4 + tools/testing/selftests/kvm/lib/kvm_util.c | 3 + .../testing/selftests/kvm/lib/kvm_util_internal.h | 2 + tools/testing/selftests/kvm/lib/s390x/processor.c | 4 + tools/testing/selftests/kvm/lib/x86_64/handlers.S | 81 +++++++++++++++ tools/testing/selftests/kvm/lib/x86_64/processor.c | 114 ++++++++++++++++++++- 9 files changed, 244 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/x86_64/handlers.S (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 30afbad36cd5..b0f1fcab79f5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -34,7 +34,7 @@ ifeq ($(ARCH),s390) endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c -LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c +LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c @@ -111,14 +111,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option) include ../lib.mk STATIC_LIBS := $(OUTPUT)/libkvm.a -LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) -EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.* +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.* + +x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) -$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) +$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS) $(AR) crs $@ $^ x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 919e161dd289..356930690bea 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -294,6 +294,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); memcpy(&(g), _p, sizeof(g)); \ }) +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); + /* Common ucalls */ enum { UCALL_NONE, diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0a65e7bb5249..02530dc6339b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -36,6 +36,8 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) +#define UNEXPECTED_VECTOR_PORT 0xfff0u + /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; @@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void) return idt; } +static inline void outl(uint16_t port, uint32_t value) +{ + __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) @@ -338,6 +345,23 @@ uint32_t kvm_get_cpuid_max_basic(void); uint32_t kvm_get_cpuid_max_extended(void); void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); +struct ex_regs { + uint64_t rax, rcx, rdx, rbx; + uint64_t rbp, rsi, rdi; + uint64_t r8, r9, r10, r11; + uint64_t r12, r13, r14, r15; + uint64_t vector; + uint64_t error_code; + uint64_t rip; + uint64_t cs; + uint64_t rflags; +}; + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 2afa6618b396..d6c32c328e9a 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 3327cebc1095..be230f3728d5 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1204,6 +1204,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) do { rc = ioctl(vcpu->fd, KVM_RUN, NULL); } while (rc == -1 && errno == EINTR); + + assert_on_unhandled_exception(vm, vcpuid); + return rc; } diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 2ef446520748..f07d383d03a1 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -50,6 +50,8 @@ struct kvm_vm { vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; + vm_vaddr_t idt; + vm_vaddr_t handlers; }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index a88c5d665725..7349bb2e1a24 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S new file mode 100644 index 000000000000..aaf7bc7d2ce1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -0,0 +1,81 @@ +handle_exception: + push %r15 + push %r14 + push %r13 + push %r12 + push %r11 + push %r10 + push %r9 + push %r8 + + push %rdi + push %rsi + push %rbp + push %rbx + push %rdx + push %rcx + push %rax + mov %rsp, %rdi + + call route_exception + + pop %rax + pop %rcx + pop %rdx + pop %rbx + pop %rbp + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %r12 + pop %r13 + pop %r14 + pop %r15 + + /* Discard vector and error code. */ + add $16, %rsp + iretq + +/* + * Build the handle_exception wrappers which push the vector/error code on the + * stack and an array of pointers to those wrappers. + */ +.pushsection .rodata +.globl idt_handlers +idt_handlers: +.popsection + +.macro HANDLERS has_error from to + vector = \from + .rept \to - \from + 1 + .align 8 + + /* Fetch current address and append it to idt_handlers. */ + current_handler = . +.pushsection .rodata +.quad current_handler +.popsection + + .if ! \has_error + pushq $0 + .endif + pushq $vector + jmp handle_exception + vector = vector + 1 + .endr +.endm + +.global idt_handler_code +idt_handler_code: + HANDLERS has_error=0 from=0 to=7 + HANDLERS has_error=1 from=8 to=8 + HANDLERS has_error=0 from=9 to=9 + HANDLERS has_error=1 from=10 to=14 + HANDLERS has_error=0 from=15 to=16 + HANDLERS has_error=1 from=17 to=17 + HANDLERS has_error=0 from=18 to=255 + +.section .note.GNU-stack, "", %progbits diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 1ccf6c9b3476..66228297ac03 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -12,9 +12,18 @@ #include "../kvm_util_internal.h" #include "processor.h" +#ifndef NUM_INTERRUPTS +#define NUM_INTERRUPTS 256 +#endif + +#define DEFAULT_CODE_SELECTOR 0x8 +#define DEFAULT_DATA_SELECTOR 0x10 + /* Minimum physical address used for virtual translation tables. */ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 +vm_vaddr_t exception_handlers; + /* Virtual translation table structure declarations */ struct pageMapL4Entry { uint64_t present:1; @@ -557,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); + kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); break; @@ -1119,3 +1128,102 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) *va_bits = (entry->eax >> 8) & 0xff; } } + +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +void kvm_exit_unexpected_vector(uint32_t value) +{ + outl(UNEXPECTED_VECTOR_PORT, value); +} + +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; + + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; + } + + kvm_exit_unexpected_vector(regs->vector); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + int i; + + vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0); + vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0); + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, + DEFAULT_CODE_SELECTOR); +} + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_sregs sregs; + + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.idt.base = vm->idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->gdt; + sregs.gdt.limit = getpagesize() - 1; + kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); + vcpu_sregs_set(vm, vcpuid, &sregs); + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ + if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO + && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT + && vcpu_state(vm, vcpuid)->io.size == 4) { + /* Grab pointer to io data */ + uint32_t *data = (void *)vcpu_state(vm, vcpuid) + + vcpu_state(vm, vcpuid)->io.data_offset; + + TEST_ASSERT(false, + "Unexpected vectored event in guest (vector:0x%x)", + *data); + } +} -- cgit From ac4a4d6de22e674cd6e3fe57199a15383496aad2 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 27 Oct 2020 16:10:44 -0700 Subject: selftests: kvm: test enforcement of paravirtual cpuid features Add a set of tests that ensure the guest cannot access paravirtual msrs and hypercalls that have been disabled in the KVM_CPUID_FEATURES leaf. Expect a #GP in the case of msr accesses and -KVM_ENOSYS from hypercalls. Cc: Jim Mattson Signed-off-by: Oliver Upton Reviewed-by: Peter Shier Reviewed-by: Aaron Lewis Message-Id: <20201027231044.655110-7-oupton@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/include/kvm_util.h | 3 + .../selftests/kvm/include/x86_64/processor.h | 12 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 28 +++ tools/testing/selftests/kvm/lib/x86_64/processor.c | 29 +++ tools/testing/selftests/kvm/x86_64/kvm_pv_test.c | 234 +++++++++++++++++++++ 7 files changed, 308 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/kvm_pv_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d2c2d6205008..22973aa75eda 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,6 +5,7 @@ /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test +/x86_64/kvm_pv_test /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index b0f1fcab79f5..44ca6badb544 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -41,6 +41,7 @@ LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 356930690bea..77f53dbc34ff 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -63,6 +63,9 @@ enum vm_mem_backing_src_type { int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap); +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 02530dc6339b..8e61340b3911 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -362,6 +362,18 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); void vm_handle_exception(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +/* + * set_cpuid() - overwrites a matching cpuid entry with the provided value. + * matches based on ent->function && ent->index. returns true + * if a match was found and successfully overwritten. + * @cpuid: the kvm cpuid list to modify. + * @ent: cpuid entry to insert + */ +bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent); + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index be230f3728d5..91e547031d8c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -86,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } +/* VCPU Enable Capability + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - VCPU + * cap - Capability + * + * Output Args: None + * + * Return: On success, 0. On failure a TEST_ASSERT failure is produced. + * + * Enables a capability (KVM_CAP_*) on the VCPU. + */ +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpu_id); + int r; + + TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id); + + r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap); + TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n" + " rc: %i, errno: %i", r, errno); + + return r; +} + static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 66228297ac03..d10c5c05bdf0 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1227,3 +1227,32 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) *data); } } + +bool set_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 *ent) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; + + if (cur->function != ent->function || cur->index != ent->index) + continue; + + memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2)); + return true; + } + + return false; +} + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3) +{ + uint64_t r; + + asm volatile("vmcall" + : "=a"(r) + : "b"(a0), "c"(a1), "d"(a2), "S"(a3)); + return r; +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c new file mode 100644 index 000000000000..b10a27485bad --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM paravirtual feature disablement + */ +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +extern unsigned char rdmsr_start; +extern unsigned char rdmsr_end; + +static u64 do_rdmsr(u32 idx) +{ + u32 lo, hi; + + asm volatile("rdmsr_start: rdmsr;" + "rdmsr_end:" + : "=a"(lo), "=c"(hi) + : "c"(idx)); + + return (((u64) hi) << 32) | lo; +} + +extern unsigned char wrmsr_start; +extern unsigned char wrmsr_end; + +static void do_wrmsr(u32 idx, u64 val) +{ + u32 lo, hi; + + lo = val; + hi = val >> 32; + + asm volatile("wrmsr_start: wrmsr;" + "wrmsr_end:" + : : "a"(lo), "c"(idx), "d"(hi)); +} + +static int nr_gp; + +static void guest_gp_handler(struct ex_regs *regs) +{ + unsigned char *rip = (unsigned char *)regs->rip; + bool r, w; + + r = rip == &rdmsr_start; + w = rip == &wrmsr_start; + GUEST_ASSERT(r || w); + + nr_gp++; + + if (r) + regs->rip = (uint64_t)&rdmsr_end; + else + regs->rip = (uint64_t)&wrmsr_end; +} + +struct msr_data { + uint32_t idx; + const char *name; +}; + +#define TEST_MSR(msr) { .idx = msr, .name = #msr } +#define UCALL_PR_MSR 0xdeadbeef +#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr) + +/* + * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or + * written, as the KVM_CPUID_FEATURES leaf is cleared. + */ +static struct msr_data msrs_to_test[] = { + TEST_MSR(MSR_KVM_SYSTEM_TIME), + TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW), + TEST_MSR(MSR_KVM_WALL_CLOCK), + TEST_MSR(MSR_KVM_WALL_CLOCK_NEW), + TEST_MSR(MSR_KVM_ASYNC_PF_EN), + TEST_MSR(MSR_KVM_STEAL_TIME), + TEST_MSR(MSR_KVM_PV_EOI_EN), + TEST_MSR(MSR_KVM_POLL_CONTROL), + TEST_MSR(MSR_KVM_ASYNC_PF_INT), + TEST_MSR(MSR_KVM_ASYNC_PF_ACK), +}; + +static void test_msr(struct msr_data *msr) +{ + PR_MSR(msr); + do_rdmsr(msr->idx); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + + nr_gp = 0; + do_wrmsr(msr->idx, 0); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + nr_gp = 0; +} + +struct hcall_data { + uint64_t nr; + const char *name; +}; + +#define TEST_HCALL(hc) { .nr = hc, .name = #hc } +#define UCALL_PR_HCALL 0xdeadc0de +#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc) + +/* + * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding + * features have been cleared in KVM_CPUID_FEATURES. + */ +static struct hcall_data hcalls_to_test[] = { + TEST_HCALL(KVM_HC_KICK_CPU), + TEST_HCALL(KVM_HC_SEND_IPI), + TEST_HCALL(KVM_HC_SCHED_YIELD), +}; + +static void test_hcall(struct hcall_data *hc) +{ + uint64_t r; + + PR_HCALL(hc); + r = kvm_hypercall(hc->nr, 0, 0, 0, 0); + GUEST_ASSERT(r == -KVM_ENOSYS); +} + +static void guest_main(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) { + test_msr(&msrs_to_test[i]); + } + + for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) { + test_hcall(&hcalls_to_test[i]); + } + + GUEST_DONE(); +} + +static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid) +{ + struct kvm_cpuid_entry2 ent = {0}; + + ent.function = KVM_CPUID_FEATURES; + TEST_ASSERT(set_cpuid(cpuid, &ent), + "failed to clear KVM_CPUID_FEATURES leaf"); +} + +static void pr_msr(struct ucall *uc) +{ + struct msr_data *msr = (struct msr_data *)uc->args[0]; + + pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx); +} + +static void pr_hcall(struct ucall *uc) +{ + struct hcall_data *hc = (struct hcall_data *)uc->args[0]; + + pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr); +} + +static void handle_abort(struct ucall *uc) +{ + TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], + __FILE__, uc->args[1]); +} + +#define VCPU_ID 0 + +static void enter_guest(struct kvm_vm *vm) +{ + struct kvm_run *run; + struct ucall uc; + int r; + + run = vcpu_state(vm, VCPU_ID); + + while (true) { + r = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_PR_MSR: + pr_msr(&uc); + break; + case UCALL_PR_HCALL: + pr_hcall(&uc); + break; + case UCALL_ABORT: + handle_abort(&uc); + return; + case UCALL_DONE: + return; + } + } +} + +int main(void) +{ + struct kvm_enable_cap cap = {0}; + struct kvm_cpuid2 *best; + struct kvm_vm *vm; + + if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) { + pr_info("will skip kvm paravirt restriction tests.\n"); + return 0; + } + + vm = vm_create_default(VCPU_ID, 0, guest_main); + + cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID; + cap.args[0] = 1; + vcpu_enable_cap(vm, VCPU_ID, &cap); + + best = kvm_get_supported_cpuid(); + clear_kvm_cpuid_features(best); + vcpu_set_cpuid(vm, VCPU_ID, best); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + + enter_guest(vm); + kvm_vm_free(vm); +} -- cgit From fd02029a9e019e941835e110651486e2d77d3f84 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 29 Oct 2020 21:17:01 +0100 Subject: KVM: selftests: Add aarch64 get-reg-list test Check for KVM_GET_REG_LIST regressions. The blessed list was created by running on v4.15 with the --core-reg-fixup option. The following script was also used in order to annotate system registers with their names when possible. When new system registers are added the names can just be added manually using the same grep. while read reg; do if [[ ! $reg =~ ARM64_SYS_REG ]]; then printf "\t$reg\n" continue fi encoding=$(echo "$reg" | sed "s/ARM64_SYS_REG(//;s/),//") if ! name=$(grep "$encoding" ../../../../arch/arm64/include/asm/sysreg.h); then printf "\t$reg\n" continue fi name=$(echo "$name" | sed "s/.*SYS_//;s/[\t ]*sys_reg($encoding)$//") printf "\t$reg\t/* $name */\n" done < <(aarch64/get-reg-list --core-reg-fixup --list) Signed-off-by: Andrew Jones Message-Id: <20201029201703.102716-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/aarch64/get-reg-list.c | 671 +++++++++++++++++++++ tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 29 + 5 files changed, 703 insertions(+) create mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 22973aa75eda..2034765862a2 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/get-reg-list /s390x/memop /s390x/resets /s390x/sync_regs_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 44ca6badb544..771455ae4a1b 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -66,6 +66,7 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c new file mode 100644 index 000000000000..3ff097f6886e --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -0,0 +1,671 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * When attempting to migrate from a host with an older kernel to a host + * with a newer kernel we allow the newer kernel on the destination to + * list new registers with get-reg-list. We assume they'll be unused, at + * least until the guest reboots, and so they're relatively harmless. + * However, if the destination host with the newer kernel is missing + * registers which the source host with the older kernel has, then that's + * a regression in get-reg-list. This test checks for that regression by + * checking the current list against a blessed list. We should never have + * missing registers, but if new ones appear then they can probably be + * added to the blessed list. A completely new blessed list can be created + * by running the test with the --list command line argument. + * + * Note, the blessed list should be created from the oldest possible + * kernel. We can't go older than v4.15, though, because that's the first + * release to expose the ID system registers in KVM_GET_REG_LIST, see + * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features + * from guests"). Also, one must use the --core-reg-fixup command line + * option when running on an older kernel that doesn't include df205b5c6328 + * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST") + */ +#include +#include +#include +#include "kvm_util.h" +#include "test_util.h" + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) + +#define for_each_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) + +#define for_each_missing_reg(i) \ + for ((i) = 0; (i) < blessed_n; ++(i)) \ + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) + +#define for_each_new_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) \ + if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + + +static struct kvm_reg_list *reg_list; + +static __u64 blessed_reg[]; +static __u64 blessed_n; + +static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +{ + int i; + + for (i = 0; i < nr_regs; ++i) + if (reg == regs[i]) + return true; + return false; +} + +static const char *str_with_index(const char *template, __u64 index) +{ + char *str, *p; + int n; + + str = strdup(template); + p = strstr(str, "##"); + n = sprintf(p, "%lld", index); + strcat(p + n, strstr(template, "##") + 2); + + return (const char *)str; +} + +#define CORE_REGS_XX_NR_WORDS 2 +#define CORE_SPSR_XX_NR_WORDS 2 +#define CORE_FPREGS_XX_NR_WORDS 4 + +static const char *core_id_to_str(__u64 id) +{ + __u64 core_off = id & ~REG_MASK, idx; + + /* + * core_off is the offset into struct kvm_regs + */ + switch (core_off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; + TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); + case KVM_REG_ARM_CORE_REG(regs.sp): + return "KVM_REG_ARM_CORE_REG(regs.sp)"; + case KVM_REG_ARM_CORE_REG(regs.pc): + return "KVM_REG_ARM_CORE_REG(regs.pc)"; + case KVM_REG_ARM_CORE_REG(regs.pstate): + return "KVM_REG_ARM_CORE_REG(regs.pstate)"; + case KVM_REG_ARM_CORE_REG(sp_el1): + return "KVM_REG_ARM_CORE_REG(sp_el1)"; + case KVM_REG_ARM_CORE_REG(elr_el1): + return "KVM_REG_ARM_CORE_REG(elr_el1)"; + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; + TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; + TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; + } + + TEST_FAIL("Unknown core reg id: 0x%llx", id); + return NULL; +} + +static void print_reg(__u64 id) +{ + unsigned op0, op1, crn, crm, op2; + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, + "KVM_REG_ARM64 missing in reg id: 0x%llx", id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U8: + reg_size = "KVM_REG_SIZE_U8"; + break; + case KVM_REG_SIZE_U16: + reg_size = "KVM_REG_SIZE_U16"; + break; + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; + case KVM_REG_SIZE_U512: + reg_size = "KVM_REG_SIZE_U512"; + break; + case KVM_REG_SIZE_U1024: + reg_size = "KVM_REG_SIZE_U1024"; + break; + case KVM_REG_SIZE_U2048: + reg_size = "KVM_REG_SIZE_U2048"; + break; + default: + TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + } + + switch (id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id)); + break; + case KVM_REG_ARM_DEMUX: + TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), + "Unexpected bits set in DEMUX reg id: 0x%llx", id); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", + reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); + break; + case KVM_REG_ARM64_SYSREG: + op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT; + op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT; + crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT; + crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; + op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; + TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), + "Unexpected bits set in SYSREG reg id: 0x%llx", id); + printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); + break; + case KVM_REG_ARM_FW: + TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), + "Unexpected bits set in FW reg id: 0x%llx", id); + printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); + break; + case KVM_REG_ARM64_SVE: + TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + break; + default: + TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + } +} + +/* + * Older kernels listed each 32-bit word of CORE registers separately. + * For 64 and 128-bit registers we need to ignore the extra words. We + * also need to fixup the sizes, because the older kernels stated all + * registers were 64-bit, even when they weren't. + */ +static void core_reg_fixup(void) +{ + struct kvm_reg_list *tmp; + __u64 id, core_off; + int i; + + tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64)); + + for (i = 0; i < reg_list->n; ++i) { + id = reg_list->reg[i]; + + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) { + tmp->reg[tmp->n++] = id; + continue; + } + + core_off = id & ~REG_MASK; + + switch (core_off) { + case 0x52: case 0xd2: case 0xd6: + /* + * These offsets are pointing at padding. + * We need to ignore them too. + */ + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + if (core_off & 3) + continue; + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U128; + tmp->reg[tmp->n++] = id; + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U32; + tmp->reg[tmp->n++] = id; + continue; + default: + if (core_off & 1) + continue; + tmp->reg[tmp->n++] = id; + break; + } + } + + free(reg_list); + reg_list = tmp; +} + +int main(int ac, char **av) +{ + int new_regs = 0, missing_regs = 0, i; + int failed_get = 0, failed_set = 0; + bool print_list = false, fixup_core_regs = false; + struct kvm_vm *vm; + + for (i = 1; i < ac; ++i) { + if (strcmp(av[i], "--core-reg-fixup") == 0) + fixup_core_regs = true; + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else + fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); + } + + vm = vm_create_default(0, 0, NULL); + reg_list = vcpu_get_reg_list(vm, 0); + + if (fixup_core_regs) + core_reg_fixup(); + + if (print_list) { + putchar('\n'); + for_each_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + return 0; + } + + /* + * We only test that we can get the register and then write back the + * same value. Some registers may allow other values to be written + * back, but others only allow some bits to be changed, and at least + * for ID registers set will fail if the value does not exactly match + * what was returned by get. If registers that allow other values to + * be written need to have the other values tested, then we should + * create a new set of tests for those in a new independent test + * executable. + */ + for_each_reg(i) { + uint8_t addr[2048 / 8]; + struct kvm_one_reg reg = { + .id = reg_list->reg[i], + .addr = (__u64)&addr, + }; + int ret; + + ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); + if (ret) { + puts("Failed to get "); + print_reg(reg.id); + putchar('\n'); + ++failed_get; + } + + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret) { + puts("Failed to set "); + print_reg(reg.id); + putchar('\n'); + ++failed_set; + } + } + + for_each_new_reg(i) + ++new_regs; + + for_each_missing_reg(i) + ++missing_regs; + + if (new_regs || missing_regs) { + printf("Number blessed registers: %5lld\n", blessed_n); + printf("Number registers: %5lld\n", reg_list->n); + } + + if (new_regs) { + printf("\nThere are %d new registers.\n" + "Consider adding them to the blessed reg " + "list with the following lines:\n\n", new_regs); + for_each_new_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + } + + if (missing_regs) { + printf("\nThere are %d missing registers.\n" + "The following lines are missing registers:\n\n", missing_regs); + for_each_missing_reg(i) + print_reg(blessed_reg[i]); + putchar('\n'); + } + + TEST_ASSERT(!missing_regs && !failed_get && !failed_set, + "There are %d missing registers; %d registers failed get; %d registers failed set", + missing_regs, failed_get, failed_set); + + return 0; +} + +/* + * The current blessed list was primed with the output of kernel version + * v4.15 with --core-reg-fixup and then later updated with new registers. + */ +static __u64 blessed_reg[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), + KVM_REG_ARM_FW_REG(0), + KVM_REG_ARM_FW_REG(1), + KVM_REG_ARM_FW_REG(2), + ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 0, 2), + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */ + ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */ + ARM64_SYS_REG(2, 0, 0, 0, 4), + ARM64_SYS_REG(2, 0, 0, 0, 5), + ARM64_SYS_REG(2, 0, 0, 0, 6), + ARM64_SYS_REG(2, 0, 0, 0, 7), + ARM64_SYS_REG(2, 0, 0, 1, 4), + ARM64_SYS_REG(2, 0, 0, 1, 5), + ARM64_SYS_REG(2, 0, 0, 1, 6), + ARM64_SYS_REG(2, 0, 0, 1, 7), + ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 4), + ARM64_SYS_REG(2, 0, 0, 2, 5), + ARM64_SYS_REG(2, 0, 0, 2, 6), + ARM64_SYS_REG(2, 0, 0, 2, 7), + ARM64_SYS_REG(2, 0, 0, 3, 4), + ARM64_SYS_REG(2, 0, 0, 3, 5), + ARM64_SYS_REG(2, 0, 0, 3, 6), + ARM64_SYS_REG(2, 0, 0, 3, 7), + ARM64_SYS_REG(2, 0, 0, 4, 4), + ARM64_SYS_REG(2, 0, 0, 4, 5), + ARM64_SYS_REG(2, 0, 0, 4, 6), + ARM64_SYS_REG(2, 0, 0, 4, 7), + ARM64_SYS_REG(2, 0, 0, 5, 4), + ARM64_SYS_REG(2, 0, 0, 5, 5), + ARM64_SYS_REG(2, 0, 0, 5, 6), + ARM64_SYS_REG(2, 0, 0, 5, 7), + ARM64_SYS_REG(2, 0, 0, 6, 4), + ARM64_SYS_REG(2, 0, 0, 6, 5), + ARM64_SYS_REG(2, 0, 0, 6, 6), + ARM64_SYS_REG(2, 0, 0, 6, 7), + ARM64_SYS_REG(2, 0, 0, 7, 4), + ARM64_SYS_REG(2, 0, 0, 7, 5), + ARM64_SYS_REG(2, 0, 0, 7, 6), + ARM64_SYS_REG(2, 0, 0, 7, 7), + ARM64_SYS_REG(2, 0, 0, 8, 4), + ARM64_SYS_REG(2, 0, 0, 8, 5), + ARM64_SYS_REG(2, 0, 0, 8, 6), + ARM64_SYS_REG(2, 0, 0, 8, 7), + ARM64_SYS_REG(2, 0, 0, 9, 4), + ARM64_SYS_REG(2, 0, 0, 9, 5), + ARM64_SYS_REG(2, 0, 0, 9, 6), + ARM64_SYS_REG(2, 0, 0, 9, 7), + ARM64_SYS_REG(2, 0, 0, 10, 4), + ARM64_SYS_REG(2, 0, 0, 10, 5), + ARM64_SYS_REG(2, 0, 0, 10, 6), + ARM64_SYS_REG(2, 0, 0, 10, 7), + ARM64_SYS_REG(2, 0, 0, 11, 4), + ARM64_SYS_REG(2, 0, 0, 11, 5), + ARM64_SYS_REG(2, 0, 0, 11, 6), + ARM64_SYS_REG(2, 0, 0, 11, 7), + ARM64_SYS_REG(2, 0, 0, 12, 4), + ARM64_SYS_REG(2, 0, 0, 12, 5), + ARM64_SYS_REG(2, 0, 0, 12, 6), + ARM64_SYS_REG(2, 0, 0, 12, 7), + ARM64_SYS_REG(2, 0, 0, 13, 4), + ARM64_SYS_REG(2, 0, 0, 13, 5), + ARM64_SYS_REG(2, 0, 0, 13, 6), + ARM64_SYS_REG(2, 0, 0, 13, 7), + ARM64_SYS_REG(2, 0, 0, 14, 4), + ARM64_SYS_REG(2, 0, 0, 14, 5), + ARM64_SYS_REG(2, 0, 0, 14, 6), + ARM64_SYS_REG(2, 0, 0, 14, 7), + ARM64_SYS_REG(2, 0, 0, 15, 4), + ARM64_SYS_REG(2, 0, 0, 15, 5), + ARM64_SYS_REG(2, 0, 0, 15, 6), + ARM64_SYS_REG(2, 0, 0, 15, 7), + ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ + ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 3), + ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 7), + ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 2), + ARM64_SYS_REG(3, 0, 0, 4, 3), + ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 5), + ARM64_SYS_REG(3, 0, 0, 4, 6), + ARM64_SYS_REG(3, 0, 0, 4, 7), + ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 2), + ARM64_SYS_REG(3, 0, 0, 5, 3), + ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 6), + ARM64_SYS_REG(3, 0, 0, 5, 7), + ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 2), + ARM64_SYS_REG(3, 0, 0, 6, 3), + ARM64_SYS_REG(3, 0, 0, 6, 4), + ARM64_SYS_REG(3, 0, 0, 6, 5), + ARM64_SYS_REG(3, 0, 0, 6, 6), + ARM64_SYS_REG(3, 0, 0, 6, 7), + ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), + ARM64_SYS_REG(3, 0, 0, 7, 4), + ARM64_SYS_REG(3, 0, 0, 7, 5), + ARM64_SYS_REG(3, 0, 0, 7, 6), + ARM64_SYS_REG(3, 0, 0, 7, 7), + ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ + ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ + ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ + ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ + ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 3, 14, 8, 0), + ARM64_SYS_REG(3, 3, 14, 8, 1), + ARM64_SYS_REG(3, 3, 14, 8, 2), + ARM64_SYS_REG(3, 3, 14, 8, 3), + ARM64_SYS_REG(3, 3, 14, 8, 4), + ARM64_SYS_REG(3, 3, 14, 8, 5), + ARM64_SYS_REG(3, 3, 14, 8, 6), + ARM64_SYS_REG(3, 3, 14, 8, 7), + ARM64_SYS_REG(3, 3, 14, 9, 0), + ARM64_SYS_REG(3, 3, 14, 9, 1), + ARM64_SYS_REG(3, 3, 14, 9, 2), + ARM64_SYS_REG(3, 3, 14, 9, 3), + ARM64_SYS_REG(3, 3, 14, 9, 4), + ARM64_SYS_REG(3, 3, 14, 9, 5), + ARM64_SYS_REG(3, 3, 14, 9, 6), + ARM64_SYS_REG(3, 3, 14, 9, 7), + ARM64_SYS_REG(3, 3, 14, 10, 0), + ARM64_SYS_REG(3, 3, 14, 10, 1), + ARM64_SYS_REG(3, 3, 14, 10, 2), + ARM64_SYS_REG(3, 3, 14, 10, 3), + ARM64_SYS_REG(3, 3, 14, 10, 4), + ARM64_SYS_REG(3, 3, 14, 10, 5), + ARM64_SYS_REG(3, 3, 14, 10, 6), + ARM64_SYS_REG(3, 3, 14, 10, 7), + ARM64_SYS_REG(3, 3, 14, 11, 0), + ARM64_SYS_REG(3, 3, 14, 11, 1), + ARM64_SYS_REG(3, 3, 14, 11, 2), + ARM64_SYS_REG(3, 3, 14, 11, 3), + ARM64_SYS_REG(3, 3, 14, 11, 4), + ARM64_SYS_REG(3, 3, 14, 11, 5), + ARM64_SYS_REG(3, 3, 14, 11, 6), + ARM64_SYS_REG(3, 3, 14, 12, 0), + ARM64_SYS_REG(3, 3, 14, 12, 1), + ARM64_SYS_REG(3, 3, 14, 12, 2), + ARM64_SYS_REG(3, 3, 14, 12, 3), + ARM64_SYS_REG(3, 3, 14, 12, 4), + ARM64_SYS_REG(3, 3, 14, 12, 5), + ARM64_SYS_REG(3, 3, 14, 12, 6), + ARM64_SYS_REG(3, 3, 14, 12, 7), + ARM64_SYS_REG(3, 3, 14, 13, 0), + ARM64_SYS_REG(3, 3, 14, 13, 1), + ARM64_SYS_REG(3, 3, 14, 13, 2), + ARM64_SYS_REG(3, 3, 14, 13, 3), + ARM64_SYS_REG(3, 3, 14, 13, 4), + ARM64_SYS_REG(3, 3, 14, 13, 5), + ARM64_SYS_REG(3, 3, 14, 13, 6), + ARM64_SYS_REG(3, 3, 14, 13, 7), + ARM64_SYS_REG(3, 3, 14, 14, 0), + ARM64_SYS_REG(3, 3, 14, 14, 1), + ARM64_SYS_REG(3, 3, 14, 14, 2), + ARM64_SYS_REG(3, 3, 14, 14, 3), + ARM64_SYS_REG(3, 3, 14, 14, 4), + ARM64_SYS_REG(3, 3, 14, 14, 5), + ARM64_SYS_REG(3, 3, 14, 14, 6), + ARM64_SYS_REG(3, 3, 14, 14, 7), + ARM64_SYS_REG(3, 3, 14, 15, 0), + ARM64_SYS_REG(3, 3, 14, 15, 1), + ARM64_SYS_REG(3, 3, 14, 15, 2), + ARM64_SYS_REG(3, 3, 14, 15, 3), + ARM64_SYS_REG(3, 3, 14, 15, 4), + ARM64_SYS_REG(3, 3, 14, 15, 5), + ARM64_SYS_REG(3, 3, 14, 15, 6), + ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ + ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, +}; +static __u64 blessed_n = ARRAY_SIZE(blessed_reg); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 77f53dbc34ff..feb949a92055 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -152,6 +152,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_guest_debug *debug); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 91e547031d8c..7669cb7c86e3 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1291,6 +1291,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, "rc: %i errno: %i", ret, errno); } +/* + * VM VCPU Get Reg List + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * None + * + * Return: + * A pointer to an allocated struct kvm_reg_list + * + * Get the list of guest registers which are supported for + * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls + */ +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n); + TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); + reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); + reg_list->n = reg_list_n.n; + vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list); + return reg_list; +} + /* * VM VCPU Regs Get * -- cgit From 31d212959179015bc07f3af4e890cadd26e01ee0 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 29 Oct 2020 21:17:03 +0100 Subject: KVM: selftests: Add blessed SVE registers to get-reg-list Add support for the SVE registers to get-reg-list and create a new test, get-reg-list-sve, which tests them when running on a machine with SVE support. Signed-off-by: Andrew Jones Message-Id: <20201029201703.102716-5-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/get-reg-list-sve.c | 3 + tools/testing/selftests/kvm/aarch64/get-reg-list.c | 254 +++++++++++++++++---- 4 files changed, 217 insertions(+), 42 deletions(-) create mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2034765862a2..ea1692d43411 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only /aarch64/get-reg-list +/aarch64/get-reg-list-sve /s390x/memop /s390x/resets /s390x/sync_regs_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 771455ae4a1b..2b0d7d325e2a 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c new file mode 100644 index 000000000000..efba76682b4b --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define REG_LIST_SVE +#include "get-reg-list.c" diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 3ff097f6886e..33218a395d9f 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -29,6 +29,13 @@ #include #include "kvm_util.h" #include "test_util.h" +#include "processor.h" + +#ifdef REG_LIST_SVE +#define reg_list_sve() (true) +#else +#define reg_list_sve() (false) +#endif #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) @@ -46,8 +53,9 @@ static struct kvm_reg_list *reg_list; -static __u64 blessed_reg[]; -static __u64 blessed_n; +static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; +static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; +static __u64 *blessed_reg, blessed_n; static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) { @@ -119,6 +127,40 @@ static const char *core_id_to_str(__u64 id) return NULL; } +static const char *sve_id_to_str(__u64 id) +{ + __u64 sve_off, n, i; + + if (id == KVM_REG_ARM64_SVE_VLS) + return "KVM_REG_ARM64_SVE_VLS"; + + sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); + i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); + + TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id); + + switch (sve_off) { + case KVM_REG_ARM64_SVE_ZREG_BASE ... + KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), + "Unexpected bits set in SVE ZREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); + case KVM_REG_ARM64_SVE_PREG_BASE ... + KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), + "Unexpected bits set in SVE PREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); + case KVM_REG_ARM64_SVE_FFR_BASE: + TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), + "Unexpected bits set in SVE FFR id: 0x%llx", id); + return "KVM_REG_ARM64_SVE_FFR(0)"; + } + + return NULL; +} + static void print_reg(__u64 id) { unsigned op0, op1, crn, crm, op2; @@ -186,7 +228,10 @@ static void print_reg(__u64 id) printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM64_SVE: - TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + if (reg_list_sve()) + printf("\t%s,\n", sve_id_to_str(id)); + else + TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); break; default: TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", @@ -251,12 +296,40 @@ static void core_reg_fixup(void) reg_list = tmp; } +static void prepare_vcpu_init(struct kvm_vcpu_init *init) +{ + if (reg_list_sve()) + init->features[0] |= 1 << KVM_ARM_VCPU_SVE; +} + +static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + int feature; + + if (reg_list_sve()) { + feature = KVM_ARM_VCPU_SVE; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + } +} + +static void check_supported(void) +{ + if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) { + fprintf(stderr, "SVE not available, skipping tests\n"); + exit(KSFT_SKIP); + } +} + int main(int ac, char **av) { + struct kvm_vcpu_init init = { .target = -1, }; int new_regs = 0, missing_regs = 0, i; - int failed_get = 0, failed_set = 0; + int failed_get = 0, failed_set = 0, failed_reject = 0; bool print_list = false, fixup_core_regs = false; struct kvm_vm *vm; + __u64 *vec_regs; + + check_supported(); for (i = 1; i < ac; ++i) { if (strcmp(av[i], "--core-reg-fixup") == 0) @@ -267,7 +340,11 @@ int main(int ac, char **av) fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); } - vm = vm_create_default(0, 0, NULL); + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + prepare_vcpu_init(&init); + aarch64_vcpu_add_default(vm, 0, &init, NULL); + finalize_vcpu(vm, 0); + reg_list = vcpu_get_reg_list(vm, 0); if (fixup_core_regs) @@ -307,6 +384,18 @@ int main(int ac, char **av) ++failed_get; } + /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ + if (find_reg(rejects_set, rejects_set_n, reg.id)) { + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret != -1 || errno != EPERM) { + printf("Failed to reject (ret=%d, errno=%d) ", ret, errno); + print_reg(reg.id); + putchar('\n'); + ++failed_reject; + } + continue; + } + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); if (ret) { puts("Failed to set "); @@ -316,6 +405,20 @@ int main(int ac, char **av) } } + if (reg_list_sve()) { + blessed_n = base_regs_n + sve_regs_n; + vec_regs = sve_regs; + } else { + blessed_n = base_regs_n + vregs_n; + vec_regs = vregs; + } + + blessed_reg = calloc(blessed_n, sizeof(__u64)); + for (i = 0; i < base_regs_n; ++i) + blessed_reg[i] = base_regs[i]; + for (i = 0; i < blessed_n - base_regs_n; ++i) + blessed_reg[base_regs_n + i] = vec_regs[i]; + for_each_new_reg(i) ++new_regs; @@ -344,9 +447,10 @@ int main(int ac, char **av) putchar('\n'); } - TEST_ASSERT(!missing_regs && !failed_get && !failed_set, - "There are %d missing registers; %d registers failed get; %d registers failed set", - missing_regs, failed_get, failed_set); + TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, + "There are %d missing registers; " + "%d registers failed get; %d registers failed set; %d registers failed reject", + missing_regs, failed_get, failed_set, failed_reject); return 0; } @@ -355,7 +459,7 @@ int main(int ac, char **av) * The current blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. */ -static __u64 blessed_reg[] = { +static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), @@ -397,38 +501,6 @@ static __u64 blessed_reg[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), KVM_REG_ARM_FW_REG(0), @@ -668,4 +740,102 @@ static __u64 blessed_reg[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, }; -static __u64 blessed_n = ARRAY_SIZE(blessed_reg); +static __u64 base_regs_n = ARRAY_SIZE(base_regs); + +static __u64 vregs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), +}; +static __u64 vregs_n = ARRAY_SIZE(vregs); + +static __u64 sve_regs[] = { + KVM_REG_ARM64_SVE_VLS, + KVM_REG_ARM64_SVE_ZREG(0, 0), + KVM_REG_ARM64_SVE_ZREG(1, 0), + KVM_REG_ARM64_SVE_ZREG(2, 0), + KVM_REG_ARM64_SVE_ZREG(3, 0), + KVM_REG_ARM64_SVE_ZREG(4, 0), + KVM_REG_ARM64_SVE_ZREG(5, 0), + KVM_REG_ARM64_SVE_ZREG(6, 0), + KVM_REG_ARM64_SVE_ZREG(7, 0), + KVM_REG_ARM64_SVE_ZREG(8, 0), + KVM_REG_ARM64_SVE_ZREG(9, 0), + KVM_REG_ARM64_SVE_ZREG(10, 0), + KVM_REG_ARM64_SVE_ZREG(11, 0), + KVM_REG_ARM64_SVE_ZREG(12, 0), + KVM_REG_ARM64_SVE_ZREG(13, 0), + KVM_REG_ARM64_SVE_ZREG(14, 0), + KVM_REG_ARM64_SVE_ZREG(15, 0), + KVM_REG_ARM64_SVE_ZREG(16, 0), + KVM_REG_ARM64_SVE_ZREG(17, 0), + KVM_REG_ARM64_SVE_ZREG(18, 0), + KVM_REG_ARM64_SVE_ZREG(19, 0), + KVM_REG_ARM64_SVE_ZREG(20, 0), + KVM_REG_ARM64_SVE_ZREG(21, 0), + KVM_REG_ARM64_SVE_ZREG(22, 0), + KVM_REG_ARM64_SVE_ZREG(23, 0), + KVM_REG_ARM64_SVE_ZREG(24, 0), + KVM_REG_ARM64_SVE_ZREG(25, 0), + KVM_REG_ARM64_SVE_ZREG(26, 0), + KVM_REG_ARM64_SVE_ZREG(27, 0), + KVM_REG_ARM64_SVE_ZREG(28, 0), + KVM_REG_ARM64_SVE_ZREG(29, 0), + KVM_REG_ARM64_SVE_ZREG(30, 0), + KVM_REG_ARM64_SVE_ZREG(31, 0), + KVM_REG_ARM64_SVE_PREG(0, 0), + KVM_REG_ARM64_SVE_PREG(1, 0), + KVM_REG_ARM64_SVE_PREG(2, 0), + KVM_REG_ARM64_SVE_PREG(3, 0), + KVM_REG_ARM64_SVE_PREG(4, 0), + KVM_REG_ARM64_SVE_PREG(5, 0), + KVM_REG_ARM64_SVE_PREG(6, 0), + KVM_REG_ARM64_SVE_PREG(7, 0), + KVM_REG_ARM64_SVE_PREG(8, 0), + KVM_REG_ARM64_SVE_PREG(9, 0), + KVM_REG_ARM64_SVE_PREG(10, 0), + KVM_REG_ARM64_SVE_PREG(11, 0), + KVM_REG_ARM64_SVE_PREG(12, 0), + KVM_REG_ARM64_SVE_PREG(13, 0), + KVM_REG_ARM64_SVE_PREG(14, 0), + KVM_REG_ARM64_SVE_PREG(15, 0), + KVM_REG_ARM64_SVE_FFR(0), + ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ +}; +static __u64 sve_regs_n = ARRAY_SIZE(sve_regs); + +static __u64 rejects_set[] = { +#ifdef REG_LIST_SVE + KVM_REG_ARM64_SVE_VLS, +#endif +}; +static __u64 rejects_set_n = ARRAY_SIZE(rejects_set); -- cgit From 3031e0288e60f09533339e61117b83099a6e126e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:28 -0400 Subject: KVM: selftests: Always clear dirty bitmap after iteration We used not to clear the dirty bitmap before because KVM_GET_DIRTY_LOG would overwrite it the next time it copies the dirty log onto it. In the upcoming dirty ring tests we'll start to fetch dirty pages from a ring buffer, so no one is going to clear the dirty bitmap for us. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012228.5916-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 752ec158ac59..6a8275a22861 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -195,7 +195,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) page); } - if (test_bit_le(page, bmap)) { + if (test_and_clear_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto -- cgit From afdb1960071935cfd5c1908691a34cc6e36931f7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:33 -0400 Subject: KVM: selftests: Use a single binary for dirty/clear log test Remove the clear_dirty_log test, instead merge it into the existing dirty_log_test. It should be cleaner to use this single binary to do both tests, also it's a preparation for the upcoming dirty ring test. The default behavior will run all the modes in sequence. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012233.6013-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 - tools/testing/selftests/kvm/clear_dirty_log_test.c | 6 - tools/testing/selftests/kvm/dirty_log_test.c | 187 +++++++++++++++++---- 3 files changed, 156 insertions(+), 39 deletions(-) delete mode 100644 tools/testing/selftests/kvm/clear_dirty_log_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 2b0d7d325e2a..c8af04dccf7f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -59,7 +59,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test -TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus @@ -68,7 +67,6 @@ TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve -TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c deleted file mode 100644 index 11672ec6f74e..000000000000 --- a/tools/testing/selftests/kvm/clear_dirty_log_test.c +++ /dev/null @@ -1,6 +0,0 @@ -#define USE_CLEAR_DIRTY_LOG -#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) -#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) -#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ - KVM_DIRTY_LOG_INITIALLY_SET) -#include "dirty_log_test.c" diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 6a8275a22861..139ccb550618 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -128,6 +128,78 @@ static uint64_t host_dirty_count; static uint64_t host_clear_count; static uint64_t host_track_next_count; +enum log_mode_t { + /* Only use KVM_GET_DIRTY_LOG for logging */ + LOG_MODE_DIRTY_LOG = 0, + + /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */ + LOG_MODE_CLEAR_LOG = 1, + + LOG_MODE_NUM, + + /* Run all supported modes */ + LOG_MODE_ALL = LOG_MODE_NUM, +}; + +/* Mode of logging to test. Default is to run all supported modes */ +static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; +/* Logging mode for current run */ +static enum log_mode_t host_log_mode; + +static bool clear_log_supported(void) +{ + return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); +} + +static void clear_log_create_vm_done(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = {}; + u64 manual_caps; + + manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!"); + manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = manual_caps; + vm_enable_cap(vm, &cap); +} + +static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); +} + +static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); + kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); +} + +struct log_mode { + const char *name; + /* Return true if this mode is supported, otherwise false */ + bool (*supported)(void); + /* Hook when the vm creation is done (before vcpu creation) */ + void (*create_vm_done)(struct kvm_vm *vm); + /* Hook to collect the dirty pages into the bitmap provided */ + void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages); +} log_modes[LOG_MODE_NUM] = { + { + .name = "dirty-log", + .collect_dirty_pages = dirty_log_collect_dirty_pages, + }, + { + .name = "clear-log", + .supported = clear_log_supported, + .create_vm_done = clear_log_create_vm_done, + .collect_dirty_pages = clear_log_collect_dirty_pages, + }, +}; + /* * We use this bitmap to track some pages that should have its dirty * bit set in the _next_ iteration. For example, if we detected the @@ -137,6 +209,44 @@ static uint64_t host_track_next_count; */ static unsigned long *host_bmap_track; +static void log_modes_dump(void) +{ + int i; + + printf("all"); + for (i = 0; i < LOG_MODE_NUM; i++) + printf(", %s", log_modes[i].name); + printf("\n"); +} + +static bool log_mode_supported(void) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->supported) + return mode->supported(); + + return true; +} + +static void log_mode_create_vm_done(struct kvm_vm *vm) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->create_vm_done) + mode->create_vm_done(vm); +} + +static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + TEST_ASSERT(mode->collect_dirty_pages != NULL, + "collect_dirty_pages() is required for any log mode!"); + mode->collect_dirty_pages(vm, slot, bitmap, num_pages); +} + static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -257,6 +367,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #ifdef __x86_64__ vm_create_irqchip(vm); #endif + log_mode_create_vm_done(vm); vm_vcpu_add_default(vm, vcpuid, guest_code); return vm; } @@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -#ifdef USE_CLEAR_DIRTY_LOG -static u64 dirty_log_manual_caps; -#endif - static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { @@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_vm *vm; unsigned long *bmap; + if (!log_mode_supported()) { + print_skip("Log mode '%s' not supported", + log_modes[host_log_mode].name); + return; + } + /* * We reserve page table for 2 times of extra dirty mem which * will definitely cover the original (1G+) test range. Here @@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); -#ifdef USE_CLEAR_DIRTY_LOG - struct kvm_enable_cap cap = {}; - - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); -#endif - /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, guest_test_phys_mem, @@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, while (iteration < iterations) { /* Give the vcpu thread some time to dirty some pages */ usleep(interval * 1000); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); -#ifdef USE_CLEAR_DIRTY_LOG - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - host_num_pages); -#endif + log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, + bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); iteration++; sync_global_to_guest(vm, iteration); @@ -410,6 +512,9 @@ static void help(char *name) TEST_HOST_LOOP_INTERVAL); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -M: specify the host logging mode " + "(default: run all log modes). Supported modes: \n\t"); + log_modes_dump(); printf(" -m: specify the guest mode ID to test " "(default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -429,18 +534,7 @@ int main(int argc, char *argv[]) bool mode_selected = false; uint64_t phys_offset = 0; unsigned int mode; - int opt, i; - -#ifdef USE_CLEAR_DIRTY_LOG - dirty_log_manual_caps = - kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - if (!dirty_log_manual_caps) { - print_skip("KVM_CLEAR_DIRTY_LOG not available"); - exit(KSFT_SKIP); - } - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); -#endif + int opt, i, j; #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); @@ -464,7 +558,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { + while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) { switch (opt) { case 'i': iterations = strtol(optarg, NULL, 10); @@ -486,6 +580,26 @@ int main(int argc, char *argv[]) "Guest mode ID %d too big", mode); guest_modes[mode].enabled = true; break; + case 'M': + if (!strcmp(optarg, "all")) { + host_log_mode_option = LOG_MODE_ALL; + break; + } + for (i = 0; i < LOG_MODE_NUM; i++) { + if (!strcmp(optarg, log_modes[i].name)) { + pr_info("Setting log mode to: '%s'\n", + optarg); + host_log_mode_option = i; + break; + } + } + if (i == LOG_MODE_NUM) { + printf("Log mode '%s' invalid. Please choose " + "from: ", optarg); + log_modes_dump(); + exit(1); + } + break; case 'h': default: help(argv[0]); @@ -507,7 +621,18 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (j = 0; j < LOG_MODE_NUM; j++) { + pr_info("Testing Log Mode '%s'\n", + log_modes[j].name); + host_log_mode = j; + run_test(i, iterations, interval, phys_offset); + } + } else { + host_log_mode = host_log_mode_option; + run_test(i, iterations, interval, phys_offset); + } } return 0; -- cgit From 4b5d12b0e21cc9f9f00201819844fcafb020ffad Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:29 -0700 Subject: KVM: selftests: Factor code out of demand_paging_test Much of the code in demand_paging_test can be reused by other, similar multi-vCPU-memory-touching-perfromance-tests. Factor that common code out for reuse. No functional change expected. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 204 +++------------------ .../testing/selftests/kvm/include/perf_test_util.h | 187 +++++++++++++++++++ 2 files changed, 210 insertions(+), 181 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/perf_test_util.h (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 360cd3ea4cd6..4251e98ceb69 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -21,18 +21,12 @@ #include #include -#include "test_util.h" -#include "kvm_util.h" +#include "perf_test_util.h" #include "processor.h" +#include "test_util.h" #ifdef __NR_userfaultfd -/* The memory slot index demand page */ -#define TEST_MEM_SLOT_INDEX 1 - -/* Default guest test virtual memory offset */ -#define DEFAULT_GUEST_TEST_MEM 0xc0000000 - #define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ #ifdef PRINT_PER_PAGE_UPDATES @@ -47,75 +41,14 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif -#define MAX_VCPUS 512 - -/* - * Guest/Host shared variables. Ensure addr_gva2hva() and/or - * sync_global_to/from_guest() are used when accessing from - * the host. READ/WRITE_ONCE() should also be used with anything - * that may change. - */ -static uint64_t host_page_size; -static uint64_t guest_page_size; - static char *guest_data_prototype; -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; - -struct vcpu_args { - uint64_t gva; - uint64_t pages; - - /* Only used by the host userspace part of the vCPU thread */ - int vcpu_id; - struct kvm_vm *vm; -}; - -static struct vcpu_args vcpu_args[MAX_VCPUS]; - -/* - * Continuously write to the first 8 bytes of each page in the demand paging - * memory region. - */ -static void guest_code(uint32_t vcpu_id) -{ - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id); - - gva = vcpu_args[vcpu_id].gva; - pages = vcpu_args[vcpu_id].pages; - - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * guest_page_size); - - addr &= ~(host_page_size - 1); - *(uint64_t *)addr = 0x0123456789ABCDEF; - } - - GUEST_SYNC(1); -} - static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *args = (struct vcpu_args *)data; - struct kvm_vm *vm = args->vm; - int vcpu_id = args->vcpu_id; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; struct timespec start, end, ts_diff; @@ -141,39 +74,6 @@ static void *vcpu_worker(void *data) return NULL; } -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; - - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - vm = _vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - return vm; -} - static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; @@ -186,7 +86,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) copy.src = (uint64_t)guest_data_prototype; copy.dst = addr; - copy.len = host_page_size; + copy.len = perf_test_args.host_page_size; copy.mode = 0; clock_gettime(CLOCK_MONOTONIC, &start); @@ -203,7 +103,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, timespec_to_ns(timespec_sub(end, start))); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", - host_page_size, addr, tid); + perf_test_args.host_page_size, addr, tid); return 0; } @@ -360,64 +260,21 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, struct timespec start, end, ts_diff; int *pipefds = NULL; struct kvm_vm *vm; - uint64_t guest_num_pages; int vcpu_id; int r; vm = create_vm(mode, vcpus, vcpu_memory_bytes); - guest_page_size = vm_get_page_size(vm); - - TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - host_page_size = getpagesize(); - TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0, - "Guest memory size is not host page size aligned."); - - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - guest_page_size; - guest_test_phys_mem &= ~(host_page_size - 1); - -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing demand paging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - guest_data_prototype = malloc(host_page_size); + guest_data_prototype = malloc(perf_test_args.host_page_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); - memset(guest_data_prototype, 0xAB, host_page_size); + memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + add_vcpus(vm, vcpus, vcpu_memory_bytes); + if (use_uffd) { uffd_handler_threads = malloc(vcpus * sizeof(*uffd_handler_threads)); @@ -428,22 +285,18 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pipefds = malloc(sizeof(int) * vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - } - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vm_paddr_t vcpu_gpa; - void *vcpu_hva; - - vm_vcpu_add_default(vm, vcpu_id, guest_code); + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vm_paddr_t vcpu_gpa; + void *vcpu_hva; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); - /* Cache the HVA pointer of the region */ - vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); + /* Cache the HVA pointer of the region */ + vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); - if (use_uffd) { /* * Set up user fault fd to handle demand paging * requests. @@ -460,22 +313,10 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, if (r < 0) exit(-r); } - -#ifdef __x86_64__ - vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); -#endif - - vcpu_args[vcpu_id].vm = vm; - vcpu_args[vcpu_id].vcpu_id = vcpu_id; - vcpu_args[vcpu_id].gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size; } /* Export the shared variables to the guest */ - sync_global_to_guest(vm, host_page_size); - sync_global_to_guest(vm, guest_page_size); - sync_global_to_guest(vm, vcpu_args); + sync_global_to_guest(vm, perf_test_args); pr_info("Finished creating vCPUs and starting uffd threads\n"); @@ -483,7 +324,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &vcpu_args[vcpu_id]); + &perf_test_args.vcpu_args[vcpu_id]); } pr_info("Started all vCPUs\n"); @@ -514,7 +355,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + perf_test_args.vcpu_args[0].pages * vcpus / + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h new file mode 100644 index 000000000000..f71f0858a1f2 --- /dev/null +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tools/testing/selftests/kvm/include/perf_test_util.h + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H +#define SELFTEST_KVM_PERF_TEST_UTIL_H + +#include "kvm_util.h" +#include "processor.h" + +#define MAX_VCPUS 512 + +#define PAGE_SHIFT_4K 12 +#define PTES_PER_4K_PT 512 + +#define TEST_MEM_SLOT_INDEX 1 + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +static uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +struct vcpu_args { + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + int vcpu_id; +}; + +struct perf_test_args { + struct kvm_vm *vm; + uint64_t host_page_size; + uint64_t guest_page_size; + + struct vcpu_args vcpu_args[MAX_VCPUS]; +}; + +static struct perf_test_args perf_test_args; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + addr &= ~(perf_test_args.host_page_size - 1); + *(uint64_t *)addr = 0x0123456789ABCDEF; + } + + GUEST_SYNC(1); +} + +static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES; + uint64_t guest_num_pages; + + /* Account for a few pages per-vCPU for stacks */ + pages += DEFAULT_STACK_PGS * vcpus; + + /* + * Reserve twice the ammount of memory needed to map the test region and + * the page table / stacks region, at 4k, for page tables. Do the + * calculation with 4K page size: the smallest of all archs. (e.g., 64K + * page size guest will need even less memory for page tables). + */ + pages += (2 * pages) / PTES_PER_4K_PT; + pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / + PTES_PER_4K_PT; + pages = vm_adjust_num_guest_pages(mode, pages); + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + vm = _vm_create(mode, pages, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + + perf_test_args.vm = vm; + perf_test_args.guest_page_size = vm_get_page_size(vm); + perf_test_args.host_page_size = getpagesize(); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + guest_num_pages = (vcpus * vcpu_memory_bytes) / + perf_test_args.guest_page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); + +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vm_vcpu_add_default(vm, vcpu_id, guest_code); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); +#endif + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} + +#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ -- cgit From 2fe5149bdfbf3c2cdfafd2b5b496252d45ca1f78 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:30 -0700 Subject: KVM: selftests: Remove address rounding in guest code Rounding the address the guest writes to a host page boundary will only have an effect if the host page size is larger than the guest page size, but in that case the guest write would still go to the same host page. There's no reason to round the address down, so remove the rounding to simplify the demand paging test. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-3-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/perf_test_util.h | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index f71f0858a1f2..838f946700f0 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -72,7 +72,6 @@ static void guest_code(uint32_t vcpu_id) for (i = 0; i < pages; i++) { uint64_t addr = gva + (i * perf_test_args.guest_page_size); - addr &= ~(perf_test_args.host_page_size - 1); *(uint64_t *)addr = 0x0123456789ABCDEF; } -- cgit From 1eafbd27edb5098ed6b6bc404c35d56c78beb0fd Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:31 -0700 Subject: KVM: selftests: Simplify demand_paging_test with timespec_diff_now Add a helper function to get the current time and return the time since a given start time. Use that function to simplify the timekeeping in the demand paging test. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-4-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 26 ++++++++++++------------ tools/testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 15 ++++++++++++-- 3 files changed, 27 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 4251e98ceb69..7de6feb00076 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -50,7 +50,8 @@ static void *vcpu_worker(void *data) int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); @@ -66,8 +67,7 @@ static void *vcpu_worker(void *data) exit_reason_str(run->exit_reason)); } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); @@ -78,7 +78,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; struct timespec start; - struct timespec end; + struct timespec ts_diff; struct uffdio_copy copy; int r; @@ -98,10 +98,10 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) return r; } - clock_gettime(CLOCK_MONOTONIC, &end); + ts_diff = timespec_diff_now(start); PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, - timespec_to_ns(timespec_sub(end, start))); + timespec_to_ns(ts_diff)); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", perf_test_args.host_page_size, addr, tid); @@ -123,7 +123,8 @@ static void *uffd_handler_thread_fn(void *arg) int pipefd = uffd_args->pipefd; useconds_t delay = uffd_args->delay; int64_t pages = 0; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; clock_gettime(CLOCK_MONOTONIC, &start); while (!quit_uffd_thread) { @@ -192,8 +193,7 @@ static void *uffd_handler_thread_fn(void *arg) pages++; } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", pages, ts_diff.tv_sec, ts_diff.tv_nsec, pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); @@ -257,7 +257,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; int *pipefds = NULL; struct kvm_vm *vm; int vcpu_id; @@ -335,9 +336,9 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } - pr_info("All vCPU threads joined\n"); + ts_diff = timespec_diff_now(start); - clock_gettime(CLOCK_MONOTONIC, &end); + pr_info("All vCPU threads joined\n"); if (use_uffd) { char c; @@ -351,7 +352,6 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, } } - ts_diff = timespec_sub(end, start); pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 5eb01bf51b86..1cc036ddb0c5 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -64,5 +64,6 @@ int64_t timespec_to_ns(struct timespec ts); struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); +struct timespec timespec_diff_now(struct timespec start); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 689e97c27ee2..1a46c2c48c7c 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -4,10 +4,13 @@ * * Copyright (C) 2020, Google LLC. */ -#include + +#include #include #include -#include +#include +#include + #include "test_util.h" /* @@ -81,6 +84,14 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) return timespec_add_ns((struct timespec){0}, ns1 - ns2); } +struct timespec timespec_diff_now(struct timespec start) +{ + struct timespec end; + + clock_gettime(CLOCK_MONOTONIC, &end); + return timespec_sub(end, start); +} + void print_skip(const char *fmt, ...) { va_list ap; -- cgit From 92ab4b9a22cfea9b0d353e86024208040c10e807 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:32 -0700 Subject: KVM: selftests: Add wrfract to common guest code Wrfract will be used by the dirty logging perf test introduced later in this series to dirty memory sparsely. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-5-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 2 ++ tools/testing/selftests/kvm/include/perf_test_util.h | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 7de6feb00076..47defc65aeda 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -266,6 +266,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vm = create_vm(mode, vcpus, vcpu_memory_bytes); + perf_test_args.wr_fract = 1; + guest_data_prototype = malloc(perf_test_args.host_page_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 838f946700f0..1716300469c0 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -46,6 +46,7 @@ struct perf_test_args { struct kvm_vm *vm; uint64_t host_page_size; uint64_t guest_page_size; + int wr_fract; struct vcpu_args vcpu_args[MAX_VCPUS]; }; @@ -72,7 +73,10 @@ static void guest_code(uint32_t vcpu_id) for (i = 0; i < pages; i++) { uint64_t addr = gva + (i * perf_test_args.guest_page_size); - *(uint64_t *)addr = 0x0123456789ABCDEF; + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); } GUEST_SYNC(1); -- cgit From f663132d1e09166db419afb9832d463e0a79f3d5 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 4 Nov 2020 22:23:48 +0100 Subject: KVM: selftests: Drop pointless vm_create wrapper Signed-off-by: Andrew Jones Message-Id: <20201104212357.171559-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- tools/testing/selftests/kvm/include/kvm_util.h | 1 - tools/testing/selftests/kvm/include/perf_test_util.h | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 7 +------ 4 files changed, 3 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 139ccb550618..54da9cc20db4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -362,7 +362,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index feb949a92055..7d29aa786959 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -68,7 +68,6 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 1716300469c0..8e053e9dc7ea 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -105,7 +105,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = _vm_create(mode, pages, O_RDWR); + vm = vm_create(mode, pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7669cb7c86e3..126c6727a6b0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -180,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; @@ -271,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) -{ - return _vm_create(mode, phy_pages, perm); -} - /* * VM Restart * -- cgit From 6769155fece2100506e22161945712afae61769f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 4 Nov 2020 22:23:52 +0100 Subject: KVM: selftests: Make the per vcpu memory size global Rename vcpu_memory_bytes to something with "percpu" in it in order to be less ambiguous. Also make it global to simplify things. Signed-off-by: Andrew Jones Message-Id: <20201104212357.171559-7-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 20 ++++++++------------ tools/testing/selftests/kvm/include/perf_test_util.h | 3 +++ 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 47defc65aeda..33acd954a298 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -27,8 +27,6 @@ #ifdef __NR_userfaultfd -#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ - #ifdef PRINT_PER_PAGE_UPDATES #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) #else @@ -251,8 +249,7 @@ static int setup_demand_paging(struct kvm_vm *vm, } static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, int vcpus, - uint64_t vcpu_memory_bytes) + useconds_t uffd_delay, int vcpus) { pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; @@ -264,7 +261,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, int vcpu_id; int r; - vm = create_vm(mode, vcpus, vcpu_memory_bytes); + vm = create_vm(mode, vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -276,7 +273,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, vcpus, vcpu_memory_bytes); + add_vcpus(vm, vcpus, guest_percpu_mem_size); if (use_uffd) { uffd_handler_threads = @@ -293,9 +290,9 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vm_paddr_t vcpu_gpa; void *vcpu_hva; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size); PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size); /* Cache the HVA pointer of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); @@ -312,7 +309,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], uffd_delay, &uffd_args[vcpu_id], - vcpu_hva, vcpu_memory_bytes); + vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); } @@ -413,7 +410,6 @@ static void help(char *name) int main(int argc, char *argv[]) { bool mode_selected = false; - uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE; int vcpus = 1; unsigned int mode; int opt, i; @@ -463,7 +459,7 @@ int main(int argc, char *argv[]) "A negative UFFD delay is not supported."); break; case 'b': - vcpu_memory_bytes = parse_size(optarg); + guest_percpu_mem_size = parse_size(optarg); break; case 'v': vcpus = atoi(optarg); @@ -486,7 +482,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes); + run_test(i, use_uffd, uffd_delay, vcpus); } return 0; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 8e053e9dc7ea..840dc2a19ce1 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -21,6 +21,8 @@ /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 +#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ + /* * Guest physical memory offset of the testing memory slot. * This will be set to the topmost valid physical address minus @@ -33,6 +35,7 @@ static uint64_t guest_test_phys_mem; * Must not conflict with identity mapped test code. */ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; struct vcpu_args { uint64_t gva; -- cgit From 3be18630954672b889186e7be9b631f00134e954 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 4 Nov 2020 22:23:53 +0100 Subject: KVM: selftests: Make the number of vcpus global We also check the input number of vcpus against the maximum supported. Signed-off-by: Andrew Jones Message-Id: <20201104212357.171559-8-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 37 ++++++++++------------ .../testing/selftests/kvm/include/perf_test_util.h | 3 ++ 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 33acd954a298..3d96a7bfaff3 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -249,7 +249,7 @@ static int setup_demand_paging(struct kvm_vm *vm, } static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, int vcpus) + useconds_t uffd_delay) { pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; @@ -261,7 +261,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, int vcpu_id; int r; - vm = create_vm(mode, vcpus, guest_percpu_mem_size); + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -270,23 +270,23 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, "Failed to allocate buffer for guest data pattern"); memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); - vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, vcpus, guest_percpu_mem_size); + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); if (use_uffd) { uffd_handler_threads = - malloc(vcpus * sizeof(*uffd_handler_threads)); + malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - uffd_args = malloc(vcpus * sizeof(*uffd_args)); + uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); TEST_ASSERT(uffd_args, "Memory allocation failed"); - pipefds = malloc(sizeof(int) * vcpus * 2); + pipefds = malloc(sizeof(int) * nr_vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vm_paddr_t vcpu_gpa; void *vcpu_hva; @@ -322,7 +322,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, &perf_test_args.vcpu_args[vcpu_id]); } @@ -330,7 +330,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("Started all vCPUs\n"); /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_join(vcpu_threads[vcpu_id], NULL); PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } @@ -343,7 +343,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, char c; /* Tell the user fault fd handler threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { r = write(pipefds[vcpu_id * 2 + 1], &c, 1); TEST_ASSERT(r == 1, "Unable to write to pipefd"); @@ -354,7 +354,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - perf_test_args.vcpu_args[0].pages * vcpus / + perf_test_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); @@ -409,8 +409,8 @@ static void help(char *name) int main(int argc, char *argv[]) { + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); bool mode_selected = false; - int vcpus = 1; unsigned int mode; int opt, i; bool use_uffd = false; @@ -462,12 +462,9 @@ int main(int argc, char *argv[]) guest_percpu_mem_size = parse_size(optarg); break; case 'v': - vcpus = atoi(optarg); - TEST_ASSERT(vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -482,7 +479,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, vcpus); + run_test(i, use_uffd, uffd_delay); } return 0; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 840dc2a19ce1..05eeb5d2bfc4 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -37,6 +37,9 @@ static uint64_t guest_test_phys_mem; static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +/* Number of VCPUs for the test */ +static int nr_vcpus = 1; + struct vcpu_args { uint64_t gva; uint64_t pages; -- cgit From 4fd94ec7d566ee2f0b52111cc6d26dd311f8a7c3 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:33 -0700 Subject: KVM: selftests: Introduce the dirty log perf test The dirty log perf test will time verious dirty logging operations (enabling dirty logging, dirtying memory, getting the dirty log, clearing the dirty log, and disabling dirty logging) in order to quantify dirty logging performance. This test can be used to inform future performance improvements to KVM's dirty logging infrastructure. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-6-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/dirty_log_perf_test.c | 376 +++++++++++++++++++++ .../testing/selftests/kvm/include/perf_test_util.h | 18 +- tools/testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 7 + 6 files changed, 396 insertions(+), 8 deletions(-) create mode 100644 tools/testing/selftests/kvm/dirty_log_perf_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index ea1692d43411..7a2c242b7152 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -27,6 +27,7 @@ /clear_dirty_log_test /demand_paging_test /dirty_log_test +/dirty_log_perf_test /kvm_create_max_vcpus /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c8af04dccf7f..3d14ef77755e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -61,6 +61,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c new file mode 100644 index 000000000000..cda7b000b1bc --- /dev/null +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging performance test + * + * Based on dirty_log_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2020, Google, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" +#include "test_util.h" + +/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ +#define TEST_HOST_LOOP_N 2UL + +/* Host variables */ +static bool host_quit; +static uint64_t iteration; +static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; + +static void *vcpu_worker(void *data) +{ + int ret; + struct kvm_vm *vm = perf_test_args.vm; + uint64_t pages_count = 0; + struct kvm_run *run; + struct timespec start; + struct timespec ts_diff; + struct timespec total = (struct timespec){0}; + struct timespec avg; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + run = vcpu_state(vm, vcpu_id); + + while (!READ_ONCE(host_quit)) { + uint64_t current_iteration = READ_ONCE(iteration); + + clock_gettime(CLOCK_MONOTONIC, &start); + ret = _vcpu_run(vm, vcpu_id); + ts_diff = timespec_diff_now(start); + + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + + pr_debug("Got sync event from vCPU %d\n", vcpu_id); + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + pr_debug("vCPU %d updated last completed iteration to %lu\n", + vcpu_id, vcpu_last_completed_iteration[vcpu_id]); + + if (current_iteration) { + pages_count += vcpu_args->pages; + total = timespec_add(total, ts_diff); + pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } else { + pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } + + while (current_iteration == READ_ONCE(iteration) && + !READ_ONCE(host_quit)) {} + } + + avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); + pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], + total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); + + return NULL; +} + +#ifdef USE_CLEAR_DIRTY_LOG +static u64 dirty_log_manual_caps; +#endif + +static void run_test(enum vm_guest_mode mode, unsigned long iterations, + uint64_t phys_offset, int wr_fract) +{ + pthread_t *vcpu_threads; + struct kvm_vm *vm; + unsigned long *bmap; + uint64_t guest_num_pages; + uint64_t host_num_pages; + int vcpu_id; + struct timespec start; + struct timespec ts_diff; + struct timespec get_dirty_log_total = (struct timespec){0}; + struct timespec vcpu_dirty_total = (struct timespec){0}; + struct timespec avg; +#ifdef USE_CLEAR_DIRTY_LOG + struct kvm_enable_cap cap = {}; + struct timespec clear_dirty_log_total = (struct timespec){0}; +#endif + + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + + perf_test_args.wr_fract = wr_fract; + + guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + host_num_pages = vm_num_host_pages(mode, guest_num_pages); + bmap = bitmap_alloc(host_num_pages); + +#ifdef USE_CLEAR_DIRTY_LOG + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = dirty_log_manual_caps; + vm_enable_cap(vm, &cap); +#endif + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + + sync_global_to_guest(vm, perf_test_args); + + /* Start the iterations */ + iteration = 0; + host_quit = false; + + clock_gettime(CLOCK_MONOTONIC, &start); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); + } + + /* Allow the vCPU to populate memory */ + pr_debug("Starting iteration %lu - Populating\n", iteration); + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n", + iteration); + + ts_diff = timespec_diff_now(start); + pr_info("Populate memory time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Enable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + KVM_MEM_LOG_DIRTY_PAGES); + ts_diff = timespec_diff_now(start); + pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + while (iteration < iterations) { + /* + * Incrementing the iteration number will start the vCPUs + * dirtying memory again. + */ + clock_gettime(CLOCK_MONOTONIC, &start); + iteration++; + + pr_debug("Starting iteration %lu\n", iteration); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n", + vcpu_id, iteration); + } + + ts_diff = timespec_diff_now(start); + vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff); + pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + + ts_diff = timespec_diff_now(start); + get_dirty_log_total = timespec_add(get_dirty_log_total, + ts_diff); + pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + host_num_pages); + + ts_diff = timespec_diff_now(start); + clear_dirty_log_total = timespec_add(clear_dirty_log_total, + ts_diff); + pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); +#endif + } + + /* Tell the vcpu thread to quit */ + host_quit = true; + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + /* Disable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + ts_diff = timespec_diff_now(start); + pr_info("Disabling dirty logging time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + avg = timespec_div(get_dirty_log_total, iterations); + pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, get_dirty_log_total.tv_sec, + get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + avg = timespec_div(clear_dirty_log_total, iterations); + pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, clear_dirty_log_total.tv_sec, + clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); +#endif + + free(bmap); + free(vcpu_threads); + ucall_uninit(vm); + kvm_vm_free(vm); +} + +struct guest_mode { + bool supported; + bool enabled; +}; +static struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_init(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +static void help(char *name) +{ + int i; + + puts(""); + printf("usage: %s [-h] [-i iterations] [-p offset] " + "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -p: specify guest physical test memory offset\n" + " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -m: specify the guest mode ID to test " + "(default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -f: specify the fraction of pages which should be written to\n" + " as opposed to simply read, in the form\n" + " 1/.\n" + " (default: 1 i.e. all pages are written to.)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + unsigned long iterations = TEST_HOST_LOOP_N; + bool mode_selected = false; + uint64_t phys_offset = 0; + unsigned int mode; + int opt, i; + int wr_fract = 1; + +#ifdef USE_CLEAR_DIRTY_LOG + dirty_log_manual_caps = + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + if (!dirty_log_manual_caps) { + print_skip("KVM_CLEAR_DIRTY_LOG not available"); + exit(KSFT_SKIP); + } + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); +#endif + +#ifdef __x86_64__ + guest_mode_init(VM_MODE_PXXV48_4K, true, true); +#endif +#ifdef __aarch64__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); + guest_mode_init(VM_MODE_P40V48_64K, true, true); + + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + if (limit >= 52) + guest_mode_init(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_init(VM_MODE_P48V48_4K, true, true); + guest_mode_init(VM_MODE_P48V48_64K, true, true); + } + } +#endif +#ifdef __s390x__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); +#endif + + while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'p': + phys_offset = strtoull(optarg, NULL, 0); + break; + case 'm': + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, + "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'f': + wr_fract = atoi(optarg); + TEST_ASSERT(wr_fract >= 1, + "Write fraction cannot be less than one"); + break; + case 'v': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0, + "Must have a positive number of vCPUs"); + TEST_ASSERT(nr_vcpus <= MAX_VCPUS, + "This test does not currently support\n" + "more than %d vCPUs.", MAX_VCPUS); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); + + pr_info("Test iterations: %"PRIu64"\n", iterations); + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + run_test(i, iterations, phys_offset, wr_fract); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 05eeb5d2bfc4..2618052057b1 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -76,16 +76,18 @@ static void guest_code(uint32_t vcpu_id) gva = vcpu_args->gva; pages = vcpu_args->pages; - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); - if (i % perf_test_args.wr_fract == 0) - *(uint64_t *)addr = 0x0123456789ABCDEF; - else - READ_ONCE(*(uint64_t *)addr); - } + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } - GUEST_SYNC(1); + GUEST_SYNC(1); + } } static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 1cc036ddb0c5..ffffa560436b 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -65,5 +65,6 @@ struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); struct timespec timespec_diff_now(struct timespec start); +struct timespec timespec_div(struct timespec ts, int divisor); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 1a46c2c48c7c..8e04c0b1608e 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -92,6 +92,13 @@ struct timespec timespec_diff_now(struct timespec start) return timespec_sub(end, start); } +struct timespec timespec_div(struct timespec ts, int divisor) +{ + int64_t ns = timespec_to_ns(ts) / divisor; + + return timespec_add_ns((struct timespec){0}, ns); +} + void print_skip(const char *fmt, ...) { va_list ap; -- cgit From 6d6a18fdde8b86b919b740ad629153de432d12a8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Nov 2020 09:45:17 -0500 Subject: KVM: selftests: allow two iterations of dirty_log_perf_test Even though one iteration is not enough for the dirty log performance test (due to the cost of building page tables, zeroing memory etc.) two is okay and it is the default. Without this patch, "./dirty_log_perf_test" without any further arguments fails. Cc: Ben Gardon Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index cda7b000b1bc..85c9b8f73142 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -359,7 +359,7 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); pr_info("Test iterations: %"PRIu64"\n", iterations); -- cgit From 197afc631413d96dc60acfc7970bdd4125d38cd3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 6 Nov 2020 16:02:51 -0800 Subject: libbpf: Don't attempt to load unused subprog as an entry-point BPF program If BPF code contains unused BPF subprogram and there are no other subprogram calls (which can realistically happen in real-world applications given sufficiently smart Clang code optimizations), libbpf will erroneously assume that subprograms are entry-point programs and will attempt to load them with UNSPEC program type. Fix by not relying on subcall instructions and rather detect it based on the structure of BPF object's sections. Fixes: 9a94f277c4fb ("tools: libbpf: restore the ability to load programs from .text section") Reported-by: Dmitrii Banshchikov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201107000251.256821-1-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 23 ++++++++++++---------- tools/testing/selftests/bpf/prog_tests/subprogs.c | 6 ++++++ .../selftests/bpf/progs/test_subprogs_unused.c | 21 ++++++++++++++++++++ 3 files changed, 40 insertions(+), 10 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_subprogs_unused.c (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 313034117070..28baee7ba1ca 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -560,8 +560,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, const char *name, size_t sec_idx, const char *sec_name, size_t sec_off, void *insn_data, size_t insn_data_sz) { - int i; - if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", sec_name, name, sec_off, insn_data_sz); @@ -600,13 +598,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, goto errout; memcpy(prog->insns, insn_data, insn_data_sz); - for (i = 0; i < prog->insns_cnt; i++) { - if (insn_is_subprog_call(&prog->insns[i])) { - obj->has_subcalls = true; - break; - } - } - return 0; errout: pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); @@ -3280,7 +3271,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls; + /* For legacy reasons, libbpf supports an entry-point BPF programs + * without SEC() attribute, i.e., those in the .text section. But if + * there are 2 or more such programs in the .text section, they all + * must be subprograms called from entry-point BPF programs in + * designated SEC()'tions, otherwise there is no way to distinguish + * which of those programs should be loaded vs which are a subprogram. + * Similarly, if there is a function/program in .text and at least one + * other BPF program with custom SEC() attribute, then we just assume + * .text programs are subprograms (even if they are not called from + * other programs), because libbpf never explicitly supported mixing + * SEC()-designated BPF programs and .text entry-point BPF programs. + */ + return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; } struct bpf_program * diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c index a00abf58c037..3f3d2ac4dd57 100644 --- a/tools/testing/selftests/bpf/prog_tests/subprogs.c +++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c @@ -3,12 +3,14 @@ #include #include #include "test_subprogs.skel.h" +#include "test_subprogs_unused.skel.h" static int duration; void test_subprogs(void) { struct test_subprogs *skel; + struct test_subprogs_unused *skel2; int err; skel = test_subprogs__open_and_load(); @@ -26,6 +28,10 @@ void test_subprogs(void) CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19); CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36); + skel2 = test_subprogs_unused__open_and_load(); + ASSERT_OK_PTR(skel2, "unused_progs_skel"); + test_subprogs_unused__destroy(skel2); + cleanup: test_subprogs__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c new file mode 100644 index 000000000000..75d975f8cf90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c @@ -0,0 +1,21 @@ +#include "vmlinux.h" +#include +#include + +const char LICENSE[] SEC("license") = "GPL"; + +__attribute__((maybe_unused)) __noinline int unused1(int x) +{ + return x + 1; +} + +static __attribute__((maybe_unused)) __noinline int unused2(int x) +{ + return x + 2; +} + +SEC("raw_tp/sys_enter") +int main_prog(void *ctx) +{ + return 0; +} -- cgit From 3e9fa9983b9297407c2448114d6d27782d5e2ef2 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 30 Sep 2020 21:04:05 -0400 Subject: tools/power turbostat: update version number goodbye summer... Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 74d2fc6fc78a..f3a1746f7f45 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5712,7 +5712,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 20.03.20" + fprintf(outf, "turbostat version 20.09.30" " - Len Brown \n"); } -- cgit From c335b4f1f65012713832d988ec06512c7bda5c04 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Thu, 5 Nov 2020 15:24:40 -0800 Subject: kunit: tool: unmark test_data as binary blobs The tools/testing/kunit/test_data/ directory was marked as binary because some of the test_data files cause checkpatch warnings. Fix this by dropping the .gitattributes file. Fixes: afc63da64f1e ("kunit: kunit_parser: make parser more robust") Signed-off-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/.gitattributes | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tools/testing/kunit/.gitattributes (limited to 'tools') diff --git a/tools/testing/kunit/.gitattributes b/tools/testing/kunit/.gitattributes deleted file mode 100644 index 5b7da1fc3b8f..000000000000 --- a/tools/testing/kunit/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -test_data/* binary -- cgit From 3959d0a63b3202ea2aa12b3f6effd5400d773d31 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Oct 2020 00:16:03 -0700 Subject: kunit: Fix kunit.py parse subcommand (use null build_dir) When JSON support was added in [1], the KunitParseRequest tuple was updated to contain a 'build_dir' field, but kunit.py parse doesn't accept --build_dir as an option. The code nevertheless tried to access it, resulting in this error: AttributeError: 'Namespace' object has no attribute 'build_dir' Given that the parser only uses the build_dir variable to set the 'build_environment' json field, we set it to None (which gives the JSON 'null') for now. Ultimately, we probably do want to be able to set this, but since it's new functionality which (for the parse subcommand) never worked, this is the quickest way of getting it back up and running. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/commit/?h=kunit-fixes&id=21a6d1780d5bbfca0ce9b8104ca6233502fcbf86 Fixes: 21a6d1780d5b ("kunit: tool: allow generating test results in JSON") Signed-off-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index ebf5f5763dee..a6d5f219f714 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -337,7 +337,7 @@ def main(argv, linux=None): kunit_output = f.read().splitlines() request = KunitParseRequest(cli_args.raw_output, kunit_output, - cli_args.build_dir, + None, cli_args.json) result = parse_tests(request) if result.status != KunitStatus.SUCCESS: -- cgit From b7e0b983ff13714d261883e89910b0755eb12169 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 21 Oct 2020 15:07:52 -0700 Subject: kunit: tool: fix pre-existing python type annotation errors The code uses annotations, but they aren't accurate. Note that type checking in python is a separate process, running `kunit.py run` will not check and complain about invalid types at runtime. Fix pre-existing issues found by running a type checker $ mypy *.py All but one of these were returning `None` without denoting this properly (via `Optional[Type]`). Signed-off-by: Daniel Latypov Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 84a1af2581f5..db0347baa428 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,7 +12,7 @@ from collections import namedtuple from datetime import datetime from enum import Enum, auto from functools import reduce -from typing import List +from typing import List, Optional, Tuple TestResult = namedtuple('TestResult', ['status','suites','log']) @@ -151,7 +151,7 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: else: return False -def parse_test_case(lines: List[str]) -> TestCase: +def parse_test_case(lines: List[str]) -> Optional[TestCase]: test_case = TestCase() save_non_diagnositic(lines, test_case) while parse_diagnostic(lines, test_case): @@ -163,7 +163,7 @@ def parse_test_case(lines: List[str]) -> TestCase: SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$') -def parse_subtest_header(lines: List[str]) -> str: +def parse_subtest_header(lines: List[str]) -> Optional[str]: consume_non_diagnositic(lines) if not lines: return None @@ -176,7 +176,7 @@ def parse_subtest_header(lines: List[str]) -> str: SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)') -def parse_subtest_plan(lines: List[str]) -> int: +def parse_subtest_plan(lines: List[str]) -> Optional[int]: consume_non_diagnositic(lines) match = SUBTEST_PLAN.match(lines[0]) if match: @@ -230,7 +230,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases) return max_status(max_test_case_status, test_suite.status) -def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite: +def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]: if not lines: return None consume_non_diagnositic(lines) @@ -271,7 +271,7 @@ def parse_tap_header(lines: List[str]) -> bool: TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') -def parse_test_plan(lines: List[str]) -> int: +def parse_test_plan(lines: List[str]) -> Optional[int]: consume_non_diagnositic(lines) match = TEST_PLAN.match(lines[0]) if match: @@ -310,7 +310,7 @@ def parse_test_result(lines: List[str]) -> TestResult: else: return TestResult(TestStatus.NO_TESTS, [], lines) -def print_and_count_results(test_result: TestResult) -> None: +def print_and_count_results(test_result: TestResult) -> Tuple[int, int, int]: total_tests = 0 failed_tests = 0 crashed_tests = 0 -- cgit From fcdb0bc08ced274078f371e1e0fe6421a97fa9f2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 26 Oct 2020 18:59:25 +0200 Subject: kunit: Do not pollute source directory with generated files (.kunitconfig) When --build_dir is provided use it and do not pollute source directory which even can be mounted over network or read-only. Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 25 ++++++++++++------------- tools/testing/kunit/kunit_kernel.py | 24 +++++++++++++++++++----- 2 files changed, 31 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index a6d5f219f714..d4f7846d0745 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -11,7 +11,6 @@ import argparse import sys import os import time -import shutil from collections import namedtuple from enum import Enum, auto @@ -44,11 +43,6 @@ class KunitStatus(Enum): BUILD_FAILURE = auto() TEST_FAILURE = auto() -def create_default_kunitconfig(): - if not os.path.exists(kunit_kernel.kunitconfig_path): - shutil.copyfile('arch/um/configs/kunit_defconfig', - kunit_kernel.kunitconfig_path) - def get_kernel_root_path(): parts = sys.argv[0] if not __file__ else __file__ parts = os.path.realpath(parts).split('tools/testing/kunit') @@ -61,7 +55,6 @@ def config_tests(linux: kunit_kernel.LinuxSourceTree, kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...') config_start = time.time() - create_default_kunitconfig() success = linux.build_reconfig(request.build_dir, request.make_options) config_end = time.time() if not success: @@ -262,12 +255,12 @@ def main(argv, linux=None): if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) - if not os.path.exists(kunit_kernel.kunitconfig_path): - create_default_kunitconfig() - if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + request = KunitRequest(cli_args.raw_output, cli_args.timeout, cli_args.jobs, @@ -283,12 +276,12 @@ def main(argv, linux=None): not os.path.exists(cli_args.build_dir)): os.mkdir(cli_args.build_dir) - if not os.path.exists(kunit_kernel.kunitconfig_path): - create_default_kunitconfig() - if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + request = KunitConfigRequest(cli_args.build_dir, cli_args.make_options) result = config_tests(linux, request) @@ -301,6 +294,9 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + request = KunitBuildRequest(cli_args.jobs, cli_args.build_dir, cli_args.alltests, @@ -315,6 +311,9 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + exec_request = KunitExecRequest(cli_args.timeout, cli_args.build_dir, cli_args.alltests) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index b557b1e93f98..633a2efcfdbd 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -6,10 +6,10 @@ # Author: Felix Guo # Author: Brendan Higgins - import logging import subprocess import os +import shutil import signal from contextlib import ExitStack @@ -18,7 +18,8 @@ import kunit_config import kunit_parser KCONFIG_PATH = '.config' -kunitconfig_path = '.kunitconfig' +KUNITCONFIG_PATH = '.kunitconfig' +DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' class ConfigError(Exception): @@ -99,19 +100,22 @@ class LinuxSourceTreeOperations(object): stderr=subprocess.STDOUT) process.wait(timeout) - def get_kconfig_path(build_dir): kconfig_path = KCONFIG_PATH if build_dir: kconfig_path = os.path.join(build_dir, KCONFIG_PATH) return kconfig_path +def get_kunitconfig_path(build_dir): + kunitconfig_path = KUNITCONFIG_PATH + if build_dir: + kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH) + return kunitconfig_path + class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" def __init__(self): - self._kconfig = kunit_config.Kconfig() - self._kconfig.read_from_file(kunitconfig_path) self._ops = LinuxSourceTreeOperations() signal.signal(signal.SIGINT, self.signal_handler) @@ -123,6 +127,16 @@ class LinuxSourceTree(object): return False return True + def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH): + kunitconfig_path = get_kunitconfig_path(build_dir) + if not os.path.exists(kunitconfig_path): + shutil.copyfile(defconfig, kunitconfig_path) + + def read_kunitconfig(self, build_dir): + kunitconfig_path = get_kunitconfig_path(build_dir) + self._kconfig = kunit_config.Kconfig() + self._kconfig.read_from_file(kunitconfig_path) + def validate_config(self, build_dir): kconfig_path = get_kconfig_path(build_dir) validated_kconfig = kunit_config.Kconfig() -- cgit From 128dc4bcc8c0c7c3bab4a3818a1ec608cccb017a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 26 Oct 2020 18:59:26 +0200 Subject: kunit: Do not pollute source directory with generated files (test.log) When --build_dir is provided use it and do not pollute source directory which even can be mounted over network or read-only. Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 633a2efcfdbd..b4768fa03ce0 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -21,6 +21,7 @@ KCONFIG_PATH = '.config' KUNITCONFIG_PATH = '.kunitconfig' DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' +OUTFILE_PATH = 'test.log' class ConfigError(Exception): """Represents an error trying to configure the Linux kernel.""" @@ -89,11 +90,12 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise BuildError(e.output.decode()) - def linux_bin(self, params, timeout, build_dir, outfile): + def linux_bin(self, params, timeout, build_dir): """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = './linux' if build_dir: linux_bin = os.path.join(build_dir, 'linux') + outfile = get_outfile_path(build_dir) with open(outfile, 'w') as output: process = subprocess.Popen([linux_bin] + params, stdout=output, @@ -112,6 +114,12 @@ def get_kunitconfig_path(build_dir): kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH) return kunitconfig_path +def get_outfile_path(build_dir): + outfile_path = OUTFILE_PATH + if build_dir: + outfile_path = os.path.join(build_dir, OUTFILE_PATH) + return outfile_path + class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" @@ -192,8 +200,8 @@ class LinuxSourceTree(object): def run_kernel(self, args=[], build_dir='', timeout=None): args.extend(['mem=1G']) - outfile = 'test.log' - self._ops.linux_bin(args, timeout, build_dir, outfile) + self._ops.linux_bin(args, timeout, build_dir) + outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) with open(outfile, 'r') as file: for line in file: -- cgit From 390881448b1ff1e9d82896abbbda7cdb8e0be27c Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 29 Oct 2020 15:09:29 -0700 Subject: kunit: tool: print out stderr from make (like build warnings) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the tool redirects make stdout + stderr, and only shows them if the make command fails. This means build warnings aren't shown to the user. This change prints the contents of stderr even if make succeeds, under the assumption these are only build warnings or other messages the user likely wants to see. We drop stdout from the raised exception since we can no longer easily collate stdout and stderr and just showing the stderr seems fine. Example with a warning: [14:56:35] Building KUnit Kernel ... ../lib/kunit/kunit-test.c: In function ‘kunit_test_successful_try’: ../lib/kunit/kunit-test.c:19:6: warning: unused variable ‘unused’ [-Wunused-variable] 19 | int unused; | ^~~~~~ [14:56:40] Starting KUnit Kernel ... Note the stderr has a trailing \n, and since we use print, we add another, but it helps separate make and kunit.py output. Example with a build error: [15:02:45] Building KUnit Kernel ... ERROR:root:../lib/kunit/kunit-test.c: In function ‘kunit_test_successful_try’: ../lib/kunit/kunit-test.c:19:2: error: unknown type name ‘invalid_type’ 19 | invalid_type *test = data; | ^~~~~~~~~~~~ ... Signed-off-by: Daniel Latypov Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index b4768fa03ce0..2e3cc0fac726 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -84,11 +84,16 @@ class LinuxSourceTreeOperations(object): if build_dir: command += ['O=' + build_dir] try: - subprocess.check_output(command, stderr=subprocess.STDOUT) + proc = subprocess.Popen(command, + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL) except OSError as e: - raise BuildError('Could not call execute make: ' + str(e)) - except subprocess.CalledProcessError as e: - raise BuildError(e.output.decode()) + raise BuildError('Could not call make command: ' + str(e)) + _, stderr = proc.communicate() + if proc.returncode != 0: + raise BuildError(stderr.decode()) + if stderr: # likely only due to build warnings + print(stderr.decode()) def linux_bin(self, params, timeout, build_dir): """Runs the Linux UML binary. Must be named 'linux'.""" -- cgit From 060352e141e4c71ce147a2737f6d30a97f2ec317 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Fri, 30 Oct 2020 15:38:53 -0700 Subject: kunit: tool: fix extra trailing \n in raw + parsed test output For simplcity, strip all trailing whitespace from parsed output. I imagine no one is printing out meaningful trailing whitespace via KUNIT_FAIL() or similar, and that if they are, they really shouldn't. `isolate_kunit_output()` yielded liens with trailing \n, which results in artifacty output like this: $ ./tools/testing/kunit/kunit.py run [16:16:46] [FAILED] example_simple_test [16:16:46] # example_simple_test: EXPECTATION FAILED at lib/kunit/kunit-example-test.c:29 [16:16:46] Expected 1 + 1 == 3, but [16:16:46] 1 + 1 == 2 [16:16:46] 3 == 3 [16:16:46] not ok 1 - example_simple_test [16:16:46] After this change: [16:16:46] # example_simple_test: EXPECTATION FAILED at lib/kunit/kunit-example-test.c:29 [16:16:46] Expected 1 + 1 == 3, but [16:16:46] 1 + 1 == 2 [16:16:46] 3 == 3 [16:16:46] not ok 1 - example_simple_test [16:16:46] We should *not* be expecting lines to end with \n in kunit_tool_test.py for this reason. Do the same for `raw_output()` as well which suffers from the same issue. This is a followup to [1], but rebased onto kunit-fixes to pick up the other raw_output() fix and fixes for kunit_tool_test.py. [1] https://lore.kernel.org/linux-kselftest/20201020233219.4146059-1-dlatypov@google.com/ Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 3 ++- tools/testing/kunit/kunit_tool_test.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index db0347baa428..bbfe1b4e4c1c 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -54,6 +54,7 @@ kunit_end_re = re.compile('(List of all partitions:|' def isolate_kunit_output(kernel_output): started = False for line in kernel_output: + line = line.rstrip() # line always has a trailing \n if kunit_start_re.search(line): prefix_len = len(line.split('TAP version')[0]) started = True @@ -65,7 +66,7 @@ def isolate_kunit_output(kernel_output): def raw_output(kernel_output): for line in kernel_output: - print(line) + print(line.rstrip()) DIVIDER = '=' * 60 diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 0b60855fb819..497ab51bc170 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -102,7 +102,7 @@ class KUnitParserTest(unittest.TestCase): 'test_data/test_output_isolated_correctly.log') file = open(log_path) result = kunit_parser.isolate_kunit_output(file.readlines()) - self.assertContains('TAP version 14\n', result) + self.assertContains('TAP version 14', result) self.assertContains(' # Subtest: example', result) self.assertContains(' 1..2', result) self.assertContains(' ok 1 - example_simple_test', result) @@ -115,7 +115,7 @@ class KUnitParserTest(unittest.TestCase): 'test_data/test_pound_sign.log') with open(log_path) as file: result = kunit_parser.isolate_kunit_output(file.readlines()) - self.assertContains('TAP version 14\n', result) + self.assertContains('TAP version 14', result) self.assertContains(' # Subtest: kunit-resource-test', result) self.assertContains(' 1..5', result) self.assertContains(' ok 1 - kunit_resource_test_init_resources', result) -- cgit From 97adb13dc9ba08ecd4758bc59efc0205f5cbf377 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Sat, 7 Nov 2020 13:19:28 +0200 Subject: selftest: fix flower terse dump tests Iproute2 tc classifier terse dump has been accepted with modified syntax. Update the tests accordingly. Signed-off-by: Vlad Buslov Fixes: e7534fd42a99 ("selftests: implement flower classifier terse dump tests") Link: https://lore.kernel.org/r/20201107111928.453534-1-vlad@buslov.dev Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tc-tests/filters/tests.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index bb543bf69d69..361235ad574b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -100,7 +100,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower.*handle", "matchCount": "1", "teardown": [ @@ -119,7 +119,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": " dst_mac e4:11:22:11:4a:51", "matchCount": "0", "teardown": [ -- cgit From fd63729cc0a6872bdabd393ee933a969642e4076 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 11 Nov 2020 15:12:15 -0800 Subject: selftests/bpf: Fix unused attribute usage in subprogs_unused test Correct attribute name is "unused". maybe_unused is a C++17 addition. This patch fixes compilation warning during selftests compilation. Fixes: 197afc631413 ("libbpf: Don't attempt to load unused subprog as an entry-point BPF program") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201111231215.1779147-1-andrii@kernel.org --- tools/testing/selftests/bpf/progs/test_subprogs_unused.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c index 75d975f8cf90..bc49e050d342 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c @@ -4,12 +4,12 @@ const char LICENSE[] SEC("license") = "GPL"; -__attribute__((maybe_unused)) __noinline int unused1(int x) +__attribute__((unused)) __noinline int unused1(int x) { return x + 1; } -static __attribute__((maybe_unused)) __noinline int unused2(int x) +static __attribute__((unused)) __noinline int unused2(int x) { return x + 2; } -- cgit From e24a87b54ef3e39261f1d859b7f78416349dfb14 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 4 Nov 2020 17:42:28 +0800 Subject: perf lock: Correct field name "flags" The tracepoint "lock:lock_acquire" contains field "flags" but not "flag". Current code wrongly retrieves value from field "flag" and it always gets zero for the value, thus "perf lock" doesn't report the correct result. This patch replaces the field name "flag" with "flags", so can read out the correct flags for locking. Fixes: e4cef1f65061 ("perf lock: Fix state machine to recognize lock sequence") Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201104094229.17509-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index f0a1dbacb46c..5cecc1ad78e1 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -406,7 +406,7 @@ static int report_lock_acquire_event(struct evsel *evsel, struct lock_seq_stat *seq; const char *name = evsel__strval(evsel, sample, "name"); u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); - int flag = evsel__intval(evsel, sample, "flag"); + int flag = evsel__intval(evsel, sample, "flags"); memcpy(&addr, &tmp, sizeof(void *)); -- cgit From b0e5a05cc9e37763c7f19366d94b1a6160c755bc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 4 Nov 2020 17:42:29 +0800 Subject: perf lock: Don't free "lock_seq_stat" if read_count isn't zero When execute command "perf lock report", it hits failure and outputs log as follows: perf: builtin-lock.c:623: report_lock_release_event: Assertion `!(seq->read_count < 0)' failed. Aborted This is an imbalance issue. The locking sequence structure "lock_seq_stat" contains the reader counter and it is used to check if the locking sequence is balance or not between acquiring and releasing. If the tool wrongly frees "lock_seq_stat" when "read_count" isn't zero, the "read_count" will be reset to zero when allocate a new structure at the next time; thus it causes the wrong counting for reader and finally results in imbalance issue. To fix this issue, if detects "read_count" is not zero (means still have read user in the locking sequence), goto the "end" tag to skip freeing structure "lock_seq_stat". Fixes: e4cef1f65061 ("perf lock: Fix state machine to recognize lock sequence") Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201104094229.17509-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 5cecc1ad78e1..a2f1e53f37a7 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -621,7 +621,7 @@ static int report_lock_release_event(struct evsel *evsel, case SEQ_STATE_READ_ACQUIRED: seq->read_count--; BUG_ON(seq->read_count < 0); - if (!seq->read_count) { + if (seq->read_count) { ls->nr_release++; goto end; } -- cgit From db1a8b97a0a36155171dbb805fbcb276e07559f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Nov 2020 13:59:15 -0300 Subject: tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' To bring in the change made in this cset: 4d6ffa27b8e5116c ("x86/lib: Change .weak to SYM_FUNC_START_WEAK for arch/x86/lib/mem*_64.S") 6dcc5627f6aec4cb ("x86/asm: Change all ENTRY+ENDPROC to SYM_FUNC_*") I needed to define SYM_FUNC_START_LOCAL() as SYM_L_GLOBAL as mem{cpy,set}_{orig,erms} are used by 'perf bench'. This silences these perf tools build warnings: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Cc: Adrian Hunter Cc: Borislav Petkov Cc: Fangrui Song Cc: Ian Rogers Cc: Jiri Olsa Cc: Jiri Slaby Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 8 +++----- tools/arch/x86/lib/memset_64.S | 11 ++++++----- tools/perf/bench/mem-memcpy-x86-64-asm.S | 3 +++ tools/perf/bench/mem-memset-x86-64-asm.S | 3 +++ tools/perf/util/include/linux/linkage.h | 7 +++++++ 5 files changed, 22 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 0b5b8ae56bd9..1e299ac73c86 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -16,8 +16,6 @@ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. */ -.weak memcpy - /* * memcpy - Copy a memory block. * @@ -30,7 +28,7 @@ * rax original destination */ SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_LOCAL(memcpy) +SYM_FUNC_START_WEAK(memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -51,14 +49,14 @@ EXPORT_SYMBOL(__memcpy) * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. */ -SYM_FUNC_START(memcpy_erms) +SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb ret SYM_FUNC_END(memcpy_erms) -SYM_FUNC_START(memcpy_orig) +SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax cmpq $0x20, %rdx diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index fd5d25a474b7..0bfd26e4ca9e 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -4,8 +4,7 @@ #include #include #include - -.weak memset +#include /* * ISO C memset - set a memory block to a byte value. This function uses fast @@ -18,7 +17,7 @@ * * rax original destination */ -SYM_FUNC_START_ALIAS(memset) +SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -44,6 +43,8 @@ SYM_FUNC_START(__memset) ret SYM_FUNC_END(__memset) SYM_FUNC_END_ALIAS(memset) +EXPORT_SYMBOL(memset) +EXPORT_SYMBOL(__memset) /* * ISO C memset - set a memory block to a byte value. This function uses @@ -56,7 +57,7 @@ SYM_FUNC_END_ALIAS(memset) * * rax original destination */ -SYM_FUNC_START(memset_erms) +SYM_FUNC_START_LOCAL(memset_erms) movq %rdi,%r9 movb %sil,%al movq %rdx,%rcx @@ -65,7 +66,7 @@ SYM_FUNC_START(memset_erms) ret SYM_FUNC_END(memset_erms) -SYM_FUNC_START(memset_orig) +SYM_FUNC_START_LOCAL(memset_orig) movq %rdi,%r10 /* expand byte value */ diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 9ad015a1e202..6eb45a2aa8db 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -2,6 +2,9 @@ /* Various wrappers to make the kernel .S file build in user-space: */ +// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index d550bd526162..6f093c483842 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memset MEMSET /* don't hide glibc's memset() */ #define altinstr_replacement text #define globl p2align 4; .globl diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index b8a5159361b4..5acf053fca7d 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -25,6 +25,7 @@ /* SYM_L_* -- linkage of symbols */ #define SYM_L_GLOBAL(name) .globl name +#define SYM_L_WEAK(name) .weak name #define SYM_L_LOCAL(name) /* nothing */ #define ALIGN __ALIGN @@ -84,6 +85,12 @@ SYM_END(name, SYM_T_FUNC) #endif +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#ifndef SYM_FUNC_START_WEAK +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) +#endif + /* * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, * SYM_FUNC_START_WEAK, ... -- cgit From db2ac2e49e564c2b219c4b33d9903aa383334256 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 10 Nov 2020 14:34:16 +0800 Subject: perf test: Fix a typo in cs-etm testing Fix a typo: s/devce_name/device_name. Fixes: fe0aed19b266 ("perf test: Introduce script for Arm CoreSight testing") Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight ml Cc: stable@kernel.org Link: http://lore.kernel.org/lkml/20201110063417.14467-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 8d84fdbed6a6..59d847d4981d 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -105,7 +105,7 @@ arm_cs_iterate_devices() { # `> device_name = 'tmc_etf0' device_name=$(basename $path) - if is_device_sink $path $devce_name; then + if is_device_sink $path $device_name; then record_touch_file $device_name $2 && perf_script_branch_samples touch && -- cgit From dd94ac807a5e10e0b25b68397c473276905cca73 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 10 Nov 2020 14:34:17 +0800 Subject: perf test: Update branch sample pattern for cs-etm Since the commit 943b69ac1884 ("perf parse-events: Set exclude_guest=1 for user-space counting"), 'exclude_guest=1' is set for user-space counting; and the branch sample's modifier has been altered, the sample event name has been changed from "branches:u:" to "branches:uH:", which gives out info for "user-space and host counting". But the cs-etm testing's regular expression cannot match the updated branch sample event and leads to test failure. This patch updates the branch sample pattern by using a more flexible expression '.*' to match branch sample's modifiers, so that allows the testing to work as expected. Fixes: 943b69ac1884 ("perf parse-events: Set exclude_guest=1 for user-space counting") Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight ml Cc: stable@kernel.org Link: http://lore.kernel.org/lkml/20201110063417.14467-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 59d847d4981d..18fde2f179cd 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -44,7 +44,7 @@ perf_script_branch_samples() { # touch 6512 1 branches:u: ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so) # touch 6512 1 branches:u: ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so) perf script -F,-time -i ${perfdata} | \ - egrep " +$1 +[0-9]+ .* +branches:([u|k]:)? +" + egrep " +$1 +[0-9]+ .* +branches:(.*:)? +" } perf_report_branch_samples() { -- cgit From 50431b45685b600fc2851a3f2b53e24643efe6d3 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 13 Nov 2020 19:51:52 +0800 Subject: tools, bpftool: Add missing close before bpftool net attach exit progfd is created by prog_parse_fd() in do_attach() and before the latter returns in case of success, the file descriptor should be closed. Fixes: 04949ccc273e ("tools: bpftool: add net attach command to attach XDP on interface") Signed-off-by: Wang Hai Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201113115152.53178-1-wanghai38@huawei.com --- tools/bpf/bpftool/net.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 910e7bac6e9e..3fae61ef6339 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -578,8 +578,8 @@ static int do_attach(int argc, char **argv) ifindex = net_parse_dev(&argc, &argv); if (ifindex < 1) { - close(progfd); - return -EINVAL; + err = -EINVAL; + goto cleanup; } if (argc) { @@ -587,8 +587,8 @@ static int do_attach(int argc, char **argv) overwrite = true; } else { p_err("expected 'overwrite', got: '%s'?", *argv); - close(progfd); - return -EINVAL; + err = -EINVAL; + goto cleanup; } } @@ -596,17 +596,17 @@ static int do_attach(int argc, char **argv) if (is_prefix("xdp", attach_type_strings[attach_type])) err = do_attach_detach_xdp(progfd, attach_type, ifindex, overwrite); - - if (err < 0) { + if (err) { p_err("interface %s attach failed: %s", attach_type_strings[attach_type], strerror(-err)); - return err; + goto cleanup; } if (json_output) jsonw_null(json_wtr); - - return 0; +cleanup: + close(progfd); + return err; } static int do_detach(int argc, char **argv) -- cgit From f782e2c300a717233b64697affda3ea7aac00b2b Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Fri, 13 Nov 2020 17:17:56 +0000 Subject: bpf: Relax return code check for subprograms Currently verifier enforces return code checks for subprograms in the same manner as it does for program entry points. This prevents returning arbitrary scalar values from subprograms. Scalar type of returned values is checked by btf_prepare_func_args() and hence it should be safe to allow only scalars for now. Relax return code checks for subprograms and allow any correct scalar values. Fixes: 51c39bb1d5d10 (bpf: Introduce function-by-function verification) Signed-off-by: Dmitrii Banshchikov Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201113171756.90594-1-me@ubique.spb.ru --- .../selftests/bpf/prog_tests/test_global_funcs.c | 1 + tools/testing/selftests/bpf/progs/test_global_func8.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/test_global_func8.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 193002b14d7f..32e4348b714b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -60,6 +60,7 @@ void test_test_global_funcs(void) { "test_global_func5.o" , "expected pointer to ctx, but got PTR" }, { "test_global_func6.o" , "modified ctx ptr R2" }, { "test_global_func7.o" , "foo() doesn't return scalar" }, + { "test_global_func8.o" }, }; libbpf_print_fn_t old_print_fn = NULL; int err, i, duration = 0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c new file mode 100644 index 000000000000..d55a6544b1ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func8.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include +#include +#include + +__noinline int foo(struct __sk_buff *skb) +{ + return bpf_get_prandom_u32(); +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + if (!foo(skb)) + return 0; + + return 1; +} -- cgit From 1c756cd429d8f3da33d31f2a970284b9d5260534 Mon Sep 17 00:00:00 2001 From: Al Grant Date: Fri, 13 Nov 2020 20:38:26 +0000 Subject: perf inject: Fix file corruption due to event deletion "perf inject" can create corrupt files when synthesizing sample events from AUX data. This happens when in the input file, the first event (for the AUX data) has a different sample_type from the second event (generally dummy). Specifically, they differ in the bits that indicate the standard fields appended to perf records in the mmap buffer. "perf inject" deletes the first event and moves up the second event to first position. The problem is with the synthetic PERF_RECORD_MMAP (etc.) events created by "perf record". Since these are synthetic versions of events which are normally produced by the kernel, they have to have the standard fields appended as described by sample_type. "perf record" fills these in with zeroes, including the IDENTIFIER field; perf readers interpret records with zero IDENTIFIER using the descriptor for the first event in the file. Since "perf inject" changes the first event, these synthetic records are then processed with the wrong value of sample_type, and the perf reader reads bad data, reports on incorrect length records etc. Mismatching sample_types are seen with "perf record -e cs_etm//", where the AUX event has TID|TIME|CPU|IDENTIFIER and the dummy event has TID|TIME|IDENTIFIER. Perhaps they could be the same, but it isn't normally a problem if they aren't - perf has no problems reading the file. The sample_types have to agree on the position of IDENTIFIER, because that's how perf finds the right event descriptor in the first place, but they don't normally have to agree on other fields, and perf doesn't check that they do. The problem is specific to the way "perf inject" reorganizes the events and the way synthetic MMAP events are recorded with a zero identifier. A simple solution is to stop "perf inject" deleting the tracing event. Committer testing Removed the now unused 'evsel' variable, update the comment about the evsel removal not being performed anymore, and apply the patch manually as it failed with this warning: warning: Patch sent with format=flowed; space at the end of lines might be lost. Testing it with: $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.543 msec (+- 0.130 msec) Average time per event: 0.838 usec (+- 0.013 usec) Average memory usage: 12717 KB (+- 9 KB) Average build-id-all injection took: 5.710 msec (+- 0.058 msec) Average time per event: 0.560 usec (+- 0.006 usec) Average memory usage: 12079 KB (+- 7 KB) $ Signed-off-by: Al Grant Acked-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra LPU-Reference: b9cf5611-daae-2390-3439-6617f8f0a34b@foss.arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 452a75fe68e5..0462dc8db2e3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -779,25 +779,15 @@ static int __cmd_inject(struct perf_inject *inject) dsos__hit_all(session); /* * The AUX areas have been removed and replaced with - * synthesized hardware events, so clear the feature flag and - * remove the evsel. + * synthesized hardware events, so clear the feature flag. */ if (inject->itrace_synth_opts.set) { - struct evsel *evsel; - perf_header__clear_feat(&session->header, HEADER_AUXTRACE); if (inject->itrace_synth_opts.last_branch || inject->itrace_synth_opts.add_last_branch) perf_header__set_feat(&session->header, HEADER_BRANCH_STACK); - evsel = perf_evlist__id2evsel_strict(session->evlist, - inject->aux_id); - if (evsel) { - pr_debug("Deleting %s\n", evsel__name(evsel)); - evlist__remove(session->evlist, evsel); - evsel__delete(evsel); - } } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; -- cgit From 568beb27959b0515d325ea1c6cf211eed2d66740 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 13 Nov 2020 10:20:53 -0800 Subject: perf test: Avoid an msan warning in a copied stack. This fix is for a failure that occurred in the DWARF unwind perf test. Stack unwinders may probe memory when looking for frames. Memory sanitizer will poison and track uninitialized memory on the stack, and on the heap if the value is copied to the heap. This can lead to false memory sanitizer failures for the use of an uninitialized value. Avoid this problem by removing the poison on the copied stack. The full msan failure with track origins looks like: ==2168==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x559ceb10755b in handle_cfi elfutils/libdwfl/frame_unwind.c:648:8 #1 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #2 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #3 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #4 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #5 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #6 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #7 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #8 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #9 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #10 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #11 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #12 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #13 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #14 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #15 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #16 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #17 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #18 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #19 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #20 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #21 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #22 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #23 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559ceb106acf in __libdwfl_frame_reg_set elfutils/libdwfl/frame_unwind.c:77:22 #1 0x559ceb106acf in handle_cfi elfutils/libdwfl/frame_unwind.c:627:13 #2 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #3 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #4 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #5 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #6 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #7 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #8 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #9 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #10 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #11 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #12 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #13 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #14 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #15 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #16 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #17 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #18 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #19 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #20 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #21 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #22 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #23 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #24 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559ceb106a54 in handle_cfi elfutils/libdwfl/frame_unwind.c:613:9 #1 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #2 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #3 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #4 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #5 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #6 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #7 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #8 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #9 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #10 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #11 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #12 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #13 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #14 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #15 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #16 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #17 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #18 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #19 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #20 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #21 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #22 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #23 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559ceaff8800 in memory_read tools/perf/util/unwind-libdw.c:156:10 #1 0x559ceb10f053 in expr_eval elfutils/libdwfl/frame_unwind.c:501:13 #2 0x559ceb1060cc in handle_cfi elfutils/libdwfl/frame_unwind.c:603:18 #3 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #4 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #5 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #6 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #7 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #8 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #9 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #10 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #11 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #12 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #13 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #14 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #15 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #16 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #17 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #18 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #19 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #20 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #21 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #22 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #23 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #24 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #25 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559cea9027d9 in __msan_memcpy llvm/llvm-project/compiler-rt/lib/msan/msan_interceptors.cpp:1558:3 #1 0x559cea9d2185 in sample_ustack tools/perf/arch/x86/tests/dwarf-unwind.c:41:2 #2 0x559cea9d202c in test__arch_unwind_sample tools/perf/arch/x86/tests/dwarf-unwind.c:72:9 #3 0x559ceabc9cbd in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:106:6 #4 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #5 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #6 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #7 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #8 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #9 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #10 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #11 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #12 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #13 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #14 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #15 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #16 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #17 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was created by an allocation of 'bf' in the stack frame of function 'perf_event__synthesize_mmap_events' #0 0x559ceafc5f60 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:445 SUMMARY: MemorySanitizer: use-of-uninitialized-value elfutils/libdwfl/frame_unwind.c:648:8 in handle_cfi Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: clang-built-linux@googlegroups.com Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201113182053.754625-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/dwarf-unwind.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 4e40402a4f81..478078fb0f22 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -38,6 +38,13 @@ static int sample_ustack(struct perf_sample *sample, stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size; memcpy(buf, (void *) sp, stack_size); +#ifdef MEMORY_SANITIZER + /* + * Copying the stack may copy msan poison, avoid false positives in the + * unwinder by removing the poison here. + */ + __msan_unpoison(buf, stack_size); +#endif stack->data = (char *) buf; stack->size = stack_size; return 0; -- cgit From 2acc3c1bc8e98bc66b1badec42e9ea205b4fcdaa Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 16 Nov 2020 18:16:33 +0800 Subject: selftests/bpf: Fix error return code in run_getsockopt_test() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 65b4414a05eb ("selftests/bpf: add sockopt test that exercises BPF_F_ALLOW_MULTI") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201116101633.64627-1-wanghai38@huawei.com --- tools/testing/selftests/bpf/prog_tests/sockopt_multi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index 29188d6f5c8d..51fac975b316 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -138,7 +138,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, */ buf = 0x40; - if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) { + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (err < 0) { log_err("Failed to call setsockopt(IP_TOS)"); goto detach; } -- cgit From ee415d73dcc24caef7f6bbf292dcc365613d2188 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 15 Nov 2020 14:06:23 +0200 Subject: tools/testing/scatterlist: Fix test to compile and run Add missing define of ALIGN_DOWN to make the test build and run. In addition, __sg_alloc_table_from_pages now support unaligned maximum segment, so adapt the test result accordingly. Fixes: 07da1223ec93 ("lib/scatterlist: Add support in dynamic allocation of SG table from pages") Link: https://lore.kernel.org/r/20201115120623.139113-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- tools/testing/scatterlist/linux/mm.h | 1 + tools/testing/scatterlist/main.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 6ae907f375d2..f9a12005fcea 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -33,6 +33,7 @@ typedef unsigned long dma_addr_t; #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index b2c7e9f7b8d3..f561aed7c657 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -52,9 +52,9 @@ int main(void) { const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT; struct test *test, tests[] = { - { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, - { -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, + { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, + { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 }, { 0, 1, pfn(0), 1, sgmax, 1 }, { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 }, -- cgit From fcb48454c23c5679d1a2e252f127642e91b05cbe Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Tue, 17 Nov 2020 16:59:11 +1100 Subject: selftests/powerpc: rfi_flush: disable entry flush if present We are about to add an entry flush. The rfi (exit) flush test measures the number of L1D flushes over a syscall with the RFI flush enabled and disabled. But if the entry flush is also enabled, the effect of enabling and disabling the RFI flush is masked. If there is a debugfs entry for the entry flush, disable it during the RFI flush and restore it later. Reported-by: Spoorthy S Signed-off-by: Russell Currey Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/security/rfi_flush.c | 35 ++++++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 93a65bd1f231..4e7b2776c367 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -85,19 +85,33 @@ int rfi_flush_test(void) __u64 l1d_misses_total = 0; unsigned long iterations = 100000, zero_size = 24 * 1024; unsigned long l1d_misses_expected; - int rfi_flush_org, rfi_flush; + int rfi_flush_orig, rfi_flush; + int have_entry_flush, entry_flush_orig; SKIP_IF(geteuid() != 0); // The PMU event we use only works on Power7 or later SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); - if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) { + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); } - rfi_flush = rfi_flush_org; + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + have_entry_flush = 0; + } else { + have_entry_flush = 1; + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + } + } + + rfi_flush = rfi_flush_orig; fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); FAIL_IF(fd < 0); @@ -106,6 +120,7 @@ int rfi_flush_test(void) FAIL_IF(perf_event_enable(fd)); + // disable L1 prefetching set_dscr(1); iter = repetitions; @@ -147,8 +162,8 @@ again: repetitions * l1d_misses_expected / 2, passes, repetitions); - if (rfi_flush == rfi_flush_org) { - rfi_flush = !rfi_flush_org; + if (rfi_flush == rfi_flush_orig) { + rfi_flush = !rfi_flush_orig; if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) { perror("error writing to powerpc/rfi_flush debugfs file"); return 1; @@ -164,11 +179,19 @@ again: set_dscr(0); - if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) { + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { perror("unable to restore original value of powerpc/rfi_flush debugfs file"); return 1; } + if (have_entry_flush) { + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush " + "debugfs file"); + return 1; + } + } + return rc; } -- cgit From 89a83a0c69c81a25ce91002b90ca27ed86132a0a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 17 Nov 2020 16:59:14 +1100 Subject: selftests/powerpc: entry flush test Add a test modelled on the RFI flush test which counts the number of L1D misses doing a simple syscall with the entry flush on and off. For simplicity of backporting, this test duplicates a lot of code from rfi_flush. We clean that up in the next patch. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/security/.gitignore | 1 + tools/testing/selftests/powerpc/security/Makefile | 2 +- .../selftests/powerpc/security/entry_flush.c | 198 +++++++++++++++++++++ 3 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/security/entry_flush.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index f795e06f5ae3..4257a1f156bb 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only rfi_flush +entry_flush diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index eadbbff50be6..921152caf1dc 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush spectre_v2 +TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c new file mode 100644 index 000000000000..7ae7e37204c5 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +#define CACHELINE_SIZE 128 + +struct perf_event_read { + __u64 nr; + __u64 l1d_misses; +}; + +static inline __u64 load(void *addr) +{ + __u64 tmp; + + asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); + + return tmp; +} + +static void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size) +{ + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + getppid(); + } +} + +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +static void set_dscr(unsigned long val) +{ + static int init; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} + +int entry_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush, entry_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + entry_flush = entry_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (entry_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n", + entry_flush, l1d_misses_total, entry_flush ? '<' : '>', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else { + printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n", + entry_flush, l1d_misses_total, entry_flush ? '>' : '<', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + } + + if (entry_flush == entry_flush_orig) { + entry_flush = !entry_flush_orig; + if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(entry_flush_test, "entry_flush_test"); +} -- cgit From 0d239f3b03efc78fb5b290aff6c747fecd3b98cb Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 17 Nov 2020 16:59:15 +1100 Subject: selftests/powerpc: refactor entry and rfi_flush tests For simplicity in backporting, the original entry_flush test contained a lot of duplicated code from the rfi_flush test. De-duplicate that code. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/include/utils.h | 5 ++ tools/testing/selftests/powerpc/security/Makefile | 2 + .../selftests/powerpc/security/entry_flush.c | 61 +------------------ .../selftests/powerpc/security/flush_utils.c | 70 ++++++++++++++++++++++ .../selftests/powerpc/security/flush_utils.h | 17 ++++++ .../testing/selftests/powerpc/security/rfi_flush.c | 61 +------------------ 6 files changed, 96 insertions(+), 120 deletions(-) create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.c create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.h (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 052b5a775dc2..b7d188fc87c7 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -42,6 +42,11 @@ int perf_event_enable(int fd); int perf_event_disable(int fd); int perf_event_reset(int fd); +struct perf_event_read { + __u64 nr; + __u64 l1d_misses; +}; + #if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30) #include #include diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 921152caf1dc..f25e854fe370 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -11,3 +11,5 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c $(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S +$(OUTPUT)/rfi_flush: flush_utils.c +$(OUTPUT)/entry_flush: flush_utils.c diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 7ae7e37204c5..78cf914fa321 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -15,66 +15,7 @@ #include #include #include "utils.h" - -#define CACHELINE_SIZE 128 - -struct perf_event_read { - __u64 nr; - __u64 l1d_misses; -}; - -static inline __u64 load(void *addr) -{ - __u64 tmp; - - asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); - - return tmp; -} - -static void syscall_loop(char *p, unsigned long iterations, - unsigned long zero_size) -{ - for (unsigned long i = 0; i < iterations; i++) { - for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) - load(p + j); - getppid(); - } -} - -static void sigill_handler(int signr, siginfo_t *info, void *unused) -{ - static int warned; - ucontext_t *ctx = (ucontext_t *)unused; - unsigned long *pc = &UCONTEXT_NIA(ctx); - - /* mtspr 3,RS to check for move to DSCR below */ - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { - if (!warned++) - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); - *pc += 4; - } else { - printf("SIGILL at %p\n", pc); - abort(); - } -} - -static void set_dscr(unsigned long val) -{ - static int init; - struct sigaction sa; - - if (!init) { - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigill_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGILL, &sa, NULL)) - perror("sigill_handler"); - init = 1; - } - - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); -} +#include "flush_utils.h" int entry_flush_test(void) { diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c new file mode 100644 index 000000000000..0c3c4c40c7fb --- /dev/null +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "flush_utils.h" + +static inline __u64 load(void *addr) +{ + __u64 tmp; + + asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); + + return tmp; +} + +void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size) +{ + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + getppid(); + } +} + +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +void set_dscr(unsigned long val) +{ + static int init; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h new file mode 100644 index 000000000000..07a5eb301466 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +/* + * Copyright 2018 IBM Corporation. + */ + +#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H +#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H + +#define CACHELINE_SIZE 128 + +void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size); + +void set_dscr(unsigned long val); + +#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 4e7b2776c367..7565fd786640 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -10,71 +10,12 @@ #include #include #include -#include #include #include #include #include "utils.h" +#include "flush_utils.h" -#define CACHELINE_SIZE 128 - -struct perf_event_read { - __u64 nr; - __u64 l1d_misses; -}; - -static inline __u64 load(void *addr) -{ - __u64 tmp; - - asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); - - return tmp; -} - -static void syscall_loop(char *p, unsigned long iterations, - unsigned long zero_size) -{ - for (unsigned long i = 0; i < iterations; i++) { - for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) - load(p + j); - getppid(); - } -} - -static void sigill_handler(int signr, siginfo_t *info, void *unused) -{ - static int warned = 0; - ucontext_t *ctx = (ucontext_t *)unused; - unsigned long *pc = &UCONTEXT_NIA(ctx); - - /* mtspr 3,RS to check for move to DSCR below */ - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { - if (!warned++) - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); - *pc += 4; - } else { - printf("SIGILL at %p\n", pc); - abort(); - } -} - -static void set_dscr(unsigned long val) -{ - static int init = 0; - struct sigaction sa; - - if (!init) { - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigill_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGILL, &sa, NULL)) - perror("sigill_handler"); - init = 1; - } - - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); -} int rfi_flush_test(void) { -- cgit From 1fd6cee127e2ddff36d648573d7566aafb0d0b77 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 18 Nov 2020 22:13:50 +0100 Subject: libbpf: Fix VERSIONED_SYM_COUNT number parsing We remove "other info" from "readelf -s --wide" output when parsing GLOBAL_SYM_COUNT variable, which was added in [1]. But we don't do that for VERSIONED_SYM_COUNT and it's failing the check_abi target on powerpc Fedora 33. The extra "other info" wasn't problem for VERSIONED_SYM_COUNT parsing until commit [2] added awk in the pipe, which assumes that the last column is symbol, but it can be "other info". Adding "other info" removal for VERSIONED_SYM_COUNT the same way as we did for GLOBAL_SYM_COUNT parsing. [1] aa915931ac3e ("libbpf: Fix readelf output parsing for Fedora") [2] 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check") Fixes: 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check") Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201118211350.1493421-1-jolsa@kernel.org --- tools/lib/bpf/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 5f9abed3e226..55bd78b3496f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -146,6 +146,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) @@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ -- cgit From c8a36aedf3e24768e94d87fdcdd37684bd241c44 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 17 Nov 2020 12:05:46 -0800 Subject: selftest/bpf: Test bpf_probe_read_user_str() strips trailing bytes after NUL Previously, bpf_probe_read_user_str() could potentially overcopy the trailing bytes after the NUL due to how do_strncpy_from_user() does the copy in long-sized strides. The issue has been fixed in the previous commit. This commit adds a selftest that ensures we don't regress bpf_probe_read_user_str() again. Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/4d977508fab4ec5b7b574b85bdf8b398868b6ee9.1605642949.git.dxu@dxuuu.xyz --- .../selftests/bpf/prog_tests/probe_read_user_str.c | 71 ++++++++++++++++++++++ .../selftests/bpf/progs/test_probe_read_user_str.c | 25 ++++++++ 2 files changed, 96 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c create mode 100644 tools/testing/selftests/bpf/progs/test_probe_read_user_str.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c new file mode 100644 index 000000000000..e419298132b5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "test_probe_read_user_str.skel.h" + +static const char str1[] = "mestring"; +static const char str2[] = "mestringalittlebigger"; +static const char str3[] = "mestringblubblubblubblubblub"; + +static int test_one_str(struct test_probe_read_user_str *skel, const char *str, + size_t len) +{ + int err, duration = 0; + char buf[256]; + + /* Ensure bytes after string are ones */ + memset(buf, 1, sizeof(buf)); + memcpy(buf, str, len); + + /* Give prog our userspace pointer */ + skel->bss->user_ptr = buf; + + /* Trigger tracepoint */ + usleep(1); + + /* Did helper fail? */ + if (CHECK(skel->bss->ret < 0, "prog_ret", "prog returned: %ld\n", + skel->bss->ret)) + return 1; + + /* Check that string was copied correctly */ + err = memcmp(skel->bss->buf, str, len); + if (CHECK(err, "memcmp", "prog copied wrong string")) + return 1; + + /* Now check that no extra trailing bytes were copied */ + memset(buf, 0, sizeof(buf)); + err = memcmp(skel->bss->buf + len, buf, sizeof(buf) - len); + if (CHECK(err, "memcmp", "trailing bytes were not stripped")) + return 1; + + return 0; +} + +void test_probe_read_user_str(void) +{ + struct test_probe_read_user_str *skel; + int err, duration = 0; + + skel = test_probe_read_user_str__open_and_load(); + if (CHECK(!skel, "test_probe_read_user_str__open_and_load", + "skeleton open and load failed\n")) + return; + + /* Give pid to bpf prog so it doesn't read from anyone else */ + skel->bss->pid = getpid(); + + err = test_probe_read_user_str__attach(skel); + if (CHECK(err, "test_probe_read_user_str__attach", + "skeleton attach failed: %d\n", err)) + goto out; + + if (test_one_str(skel, str1, sizeof(str1))) + goto out; + if (test_one_str(skel, str2, sizeof(str2))) + goto out; + if (test_one_str(skel, str3, sizeof(str3))) + goto out; + +out: + test_probe_read_user_str__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c new file mode 100644 index 000000000000..3ae398b75dcd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +#include + +pid_t pid = 0; +long ret = 0; +void *user_ptr = 0; +char buf[256] = {}; + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int on_write(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + ret = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr); + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit From f5098e34dd4c774c3040e417960f1637e5daade8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Nov 2020 11:33:02 -0800 Subject: selftests/seccomp: powerpc: Fix typo in macro variable name A typo sneaked into the powerpc selftest. Fix the name so it builds again. Fixes: 46138329faea ("selftests/seccomp: powerpc: Fix seccomp return value testing") Acked-by: Michael Ellerman Link: https://lore.kernel.org/lkml/87y2ix2895.fsf@mpe.ellerman.id.au Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 4a180439ee9e..7f7ecfcd66db 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1758,10 +1758,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) * and the code is stored as a positive value. \ */ \ if (_result < 0) { \ - SYSCALL_RET(_regs) = -result; \ + SYSCALL_RET(_regs) = -_result; \ (_regs).ccr |= 0x10000000; \ } else { \ - SYSCALL_RET(_regs) = result; \ + SYSCALL_RET(_regs) = _result; \ (_regs).ccr &= ~0x10000000; \ } \ } while (0) -- cgit From 4c222f31fb1db4d590503a181a6268ced9252379 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Nov 2020 11:54:43 -0800 Subject: selftests/seccomp: sh: Fix register names It looks like the seccomp selftests was never actually built for sh. This fixes it, though I don't have an environment to do a runtime test of it yet. Fixes: 0bb605c2c7f2b4b3 ("sh: Add SECCOMP_FILTER") Tested-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/lkml/a36d7b48-6598-1642-e403-0c77a86f416d@physik.fu-berlin.de Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7f7ecfcd66db..26c72f2b61b1 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1804,8 +1804,8 @@ TEST_F(TRACE_poke, getpid_runs_normally) #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] #elif defined(__sh__) # define ARCH_REGS struct pt_regs -# define SYSCALL_NUM(_regs) (_regs).gpr[3] -# define SYSCALL_RET(_regs) (_regs).gpr[0] +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# define SYSCALL_RET(_regs) (_regs).regs[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif -- cgit From 3b13eaf0ba1d5ab59368e23ff5e5350f51c1a352 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Nov 2020 08:32:33 -0300 Subject: perf tools: Update copy of libbpf's hashmap.c To pick the changes in: 7a078d2d18801bba ("libbpf, hashmap: Fix undefined behavior in hash_bits") That don't entail any changes in tools/perf. This addresses this perf build warning: Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h' diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h Not a kernel ABI, its just that this uses the mechanism in place for checking kernel ABI files drift. Cc: Adrian Hunter Cc: Daniel Borkmann Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hashmap.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index d9b385fe808c..10a4c4cd13cf 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -15,6 +15,9 @@ static inline size_t hash_bits(size_t h, int bits) { /* shuffle bits and return requested number of upper bits */ + if (bits == 0) + return 0; + #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) /* LP64 case */ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); @@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur; \ cur = cur->next) \ if (map->equal_fn(cur->key, (_key), map->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ if (map->equal_fn(cur->key, (_key), map->ctx)) -- cgit From 9713070028b9ab317f395ee130fa2c4ea741bab4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 24 Nov 2020 18:36:52 +0800 Subject: perf diff: Fix error return value in __cmd_diff() An appropriate return value should be set on the failed path. Fixes: 2a09a84c720b436a ("perf diff: Support hot streams comparison") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201124103652.438-1-thunder.leizhen@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 584e2e1a3793..cefc71506409 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1222,8 +1222,10 @@ static int __cmd_diff(void) if (compute == COMPUTE_STREAM) { d->evlist_streams = evlist__create_streams( d->session->evlist, 5); - if (!d->evlist_streams) + if (!d->evlist_streams) { + ret = -ENOMEM; goto out_delete; + } } } -- cgit From aa50d953c169e876413bf237319e728dd41d9fdd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Nov 2020 14:43:56 +0900 Subject: perf record: Synthesize cgroup events only if needed It didn't check the tool->cgroup_events bit which is set when the --all-cgroups option is given. Without it, samples will not have cgroup info so no reason to synthesize. We can check the PERF_RECORD_CGROUP records after running perf record *WITHOUT* the --all-cgroups option: Before: $ perf report -D | grep CGROUP 0 0 0x8430 [0x38]: PERF_RECORD_CGROUP cgroup: 1 / CGROUP events: 1 CGROUP events: 0 CGROUP events: 0 After: $ perf report -D | grep CGROUP CGROUP events: 0 CGROUP events: 0 CGROUP events: 0 Committer testing: Before: # perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.208 MB perf.data (10003 samples) ] # perf report -D | grep "CGROUP events" CGROUP events: 146 CGROUP events: 0 CGROUP events: 0 # After: # perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.208 MB perf.data (10448 samples) ] # perf report -D | grep "CGROUP events" CGROUP events: 0 CGROUP events: 0 CGROUP events: 0 # With all-cgroups: # perf record --all-cgroups -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.374 MB perf.data (11526 samples) ] # perf report -D | grep "CGROUP events" CGROUP events: 146 CGROUP events: 0 CGROUP events: 0 # Fixes: 8fb4b67939e16 ("perf record: Add --all-cgroups option") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201127054356.405481-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 8a23391558cf..d9c624377da7 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -563,6 +563,9 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool, char cgrp_root[PATH_MAX]; size_t mount_len; /* length of mount point in the path */ + if (!tool || !tool->cgroup_events) + return 0; + if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) { pr_debug("cannot find cgroup mount point\n"); return -1; -- cgit From c0ee1d5ae8c8650031badcfca6483a28c0f94f38 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Nov 2020 13:14:03 +0900 Subject: perf stat: Use proper cpu for shadow stats Currently perf stat shows some metrics (like IPC) for defined events. But when no aggregation mode is used (-A option), it shows incorrect values since it used a value from a different cpu. Before: $ perf stat -aA -e cycles,instructions sleep 1 Performance counter stats for 'system wide': CPU0 116,057,380 cycles CPU1 86,084,722 cycles CPU2 99,423,125 cycles CPU3 98,272,994 cycles CPU0 53,369,217 instructions # 0.46 insn per cycle CPU1 33,378,058 instructions # 0.29 insn per cycle CPU2 58,150,086 instructions # 0.50 insn per cycle CPU3 40,029,703 instructions # 0.34 insn per cycle 1.001816971 seconds time elapsed So the IPC for CPU1 should be 0.38 (= 33,378,058 / 86,084,722) but it was 0.29 (= 33,378,058 / 116,057,380) and so on. After: $ perf stat -aA -e cycles,instructions sleep 1 Performance counter stats for 'system wide': CPU0 109,621,384 cycles CPU1 159,026,454 cycles CPU2 99,460,366 cycles CPU3 124,144,142 cycles CPU0 44,396,706 instructions # 0.41 insn per cycle CPU1 120,195,425 instructions # 0.76 insn per cycle CPU2 44,763,978 instructions # 0.45 insn per cycle CPU3 69,049,079 instructions # 0.56 insn per cycle 1.001910444 seconds time elapsed Fixes: 44d49a600259 ("perf stat: Support metrics in --per-core/socket mode") Reported-by: Sam Xi Signed-off-by: Namhyung Kim Reviewed-by: Andi Kleen Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201127041404.390276-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 4b57c0c07632..a963b5b8eb72 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -324,13 +324,10 @@ static int first_shadow_cpu(struct perf_stat_config *config, struct evlist *evlist = evsel->evlist; int i; - if (!config->aggr_get_id) - return 0; - if (config->aggr_mode == AGGR_NONE) return id; - if (config->aggr_mode == AGGR_GLOBAL) + if (!config->aggr_get_id) return 0; for (i = 0; i < evsel__nr_cpus(evsel); i++) { -- cgit From ab4200c17ba6fe71d2da64317aae8a8aa684624c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Nov 2020 14:48:46 +0900 Subject: perf probe: Fix to die_entrypc() returns error correctly Fix die_entrypc() to return error correctly if the DIE has no DW_AT_ranges attribute. Since dwarf_ranges() will treat the case as an empty ranges and return 0, we have to check it by ourselves. Fixes: 91e2f539eeda ("perf probe: Fix to show function entry line as probe-able") Signed-off-by: Masami Hiramatsu Cc: Sumanth Korikkar Cc: Thomas Richter Link: http://lore.kernel.org/lkml/160645612634.2824037.5284932731175079426.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index aa898014ad12..03c1a39c312a 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -373,6 +373,7 @@ bool die_is_func_def(Dwarf_Die *dw_die) int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) { Dwarf_Addr base, end; + Dwarf_Attribute attr; if (!addr) return -EINVAL; @@ -380,6 +381,13 @@ int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) if (dwarf_entrypc(dw_die, addr) == 0) return 0; + /* + * Since the dwarf_ranges() will return 0 if there is no + * DW_AT_ranges attribute, we should check it first. + */ + if (!dwarf_attr(dw_die, DW_AT_ranges, &attr)) + return -ENOENT; + return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0; } -- cgit From a9ffd0484eb4426e6befd07e7be6c01108716302 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Nov 2020 14:48:55 +0900 Subject: perf probe: Change function definition check due to broken DWARF Since some gcc generates a broken DWARF which lacks DW_AT_declaration attribute from the subprogram DIE of function prototype. (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97060) So, in addition to the DW_AT_declaration check, we also check the subprogram DIE has DW_AT_inline or actual entry pc. Committer testing: # cat /etc/fedora-release Fedora release 33 (Thirty Three) # Before: # perf test vfs_getname 78: Use vfs_getname probe to get syscall args filenames : FAILED! 79: Check open filename arg using perf trace + vfs_getname : FAILED! 81: Add vfs_getname probe to get syscall args filenames : FAILED! # After: # perf test vfs_getname 78: Use vfs_getname probe to get syscall args filenames : Ok 79: Check open filename arg using perf trace + vfs_getname : Ok 81: Add vfs_getname probe to get syscall args filenames : Ok # Reported-by: Thomas Richter Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Sumanth Korikkar Link: http://lore.kernel.org/lkml/160645613571.2824037.7441351537890235895.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 20 ++++++++++++++++++-- tools/perf/util/probe-finder.c | 3 +-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 03c1a39c312a..7b2d471a6419 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -356,9 +356,25 @@ bool die_is_signed_type(Dwarf_Die *tp_die) bool die_is_func_def(Dwarf_Die *dw_die) { Dwarf_Attribute attr; + Dwarf_Addr addr = 0; + + if (dwarf_tag(dw_die) != DW_TAG_subprogram) + return false; + + if (dwarf_attr(dw_die, DW_AT_declaration, &attr)) + return false; - return (dwarf_tag(dw_die) == DW_TAG_subprogram && - dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL); + /* + * DW_AT_declaration can be lost from function declaration + * by gcc's bug #97060. + * So we need to check this subprogram DIE has DW_AT_inline + * or an entry address. + */ + if (!dwarf_attr(dw_die, DW_AT_inline, &attr) && + die_entrypc(dw_die, &addr) < 0) + return false; + + return true; } /** diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 2c4061035f77..76dd349aa48d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1885,8 +1885,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) return DWARF_CB_OK; - if (die_is_func_def(sp_die) && - die_match_name(sp_die, lr->function)) { + if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) { lf->fname = dwarf_decl_file(sp_die); dwarf_decl_line(sp_die, &lr->offset); pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); -- cgit