diff options
Diffstat (limited to 'tools/testing')
251 files changed, 16375 insertions, 1378 deletions
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index a5f7fdd0c1fb..001c4df9f7df 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -21,6 +21,8 @@ my %opt; my %repeat_tests; my %repeats; my %evals; +my @command_vars; +my %command_tmp_vars; #default opts my %default = ( @@ -216,6 +218,7 @@ my $patchcheck_type; my $patchcheck_start; my $patchcheck_cherry; my $patchcheck_end; +my $patchcheck_skip; my $build_time; my $install_time; @@ -380,6 +383,7 @@ my %option_map = ( "PATCHCHECK_START" => \$patchcheck_start, "PATCHCHECK_CHERRY" => \$patchcheck_cherry, "PATCHCHECK_END" => \$patchcheck_end, + "PATCHCHECK_SKIP" => \$patchcheck_skip, ); # Options may be used by other options, record them. @@ -900,14 +904,22 @@ sub set_eval { } sub set_variable { - my ($lvalue, $rvalue) = @_; + my ($lvalue, $rvalue, $command) = @_; + # Command line variables override all others + if (defined($command_tmp_vars{$lvalue})) { + return; + } if ($rvalue =~ /^\s*$/) { delete $variable{$lvalue}; } else { $rvalue = process_variables($rvalue); $variable{$lvalue} = $rvalue; } + + if (defined($command)) { + $command_tmp_vars{$lvalue} = 1; + } } sub process_compare { @@ -1286,6 +1298,19 @@ sub read_config { $test_case = __read_config $config, \$test_num; + foreach my $val (@command_vars) { + chomp $val; + my %command_overrides; + if ($val =~ m/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) { + my $lvalue = $1; + my $rvalue = $2; + + set_value($lvalue, $rvalue, 1, \%command_overrides, "COMMAND LINE"); + } else { + die "Invalid option definition '$val'\n"; + } + } + # make sure we have all mandatory configs get_mandatory_configs; @@ -1371,7 +1396,10 @@ sub __eval_option { # If a variable contains itself, use the default var if (($var eq $name) && defined($opt{$var})) { $o = $opt{$var}; - $retval = "$retval$o"; + # Only append if the default doesn't contain itself + if ($o !~ m/\$\{$var\}/) { + $retval = "$retval$o"; + } } elsif (defined($opt{$o})) { $o = $opt{$o}; $retval = "$retval$o"; @@ -3511,11 +3539,37 @@ sub patchcheck { @list = reverse @list; } + my %skip_list; + my $will_skip = 0; + + if (defined($patchcheck_skip)) { + foreach my $s (split /\s+/, $patchcheck_skip) { + $s = `git log --pretty=oneline $s~1..$s`; + $s =~ s/^(\S+).*/$1/; + chomp $s; + $skip_list{$s} = 1; + $will_skip++; + } + } + doprint("Going to test the following commits:\n"); foreach my $l (@list) { + my $sha1 = $l; + $sha1 =~ s/^([[:xdigit:]]+).*/$1/; + next if (defined($skip_list{$sha1})); doprint "$l\n"; } + if ($will_skip) { + doprint("\nSkipping the following commits:\n"); + foreach my $l (@list) { + my $sha1 = $l; + $sha1 =~ s/^([[:xdigit:]]+).*/$1/; + next if (!defined($skip_list{$sha1})); + doprint "$l\n"; + } + } + my $save_clean = $noclean; my %ignored_warnings; @@ -3530,6 +3584,11 @@ sub patchcheck { my $sha1 = $item; $sha1 =~ s/^([[:xdigit:]]+).*/$1/; + if (defined($skip_list{$sha1})) { + doprint "\nSkipping \"$item\"\n\n"; + next; + } + doprint "\nProcessing commit \"$item\"\n\n"; run_command "git checkout $sha1" or @@ -4242,8 +4301,55 @@ sub cancel_test { die "\nCaught Sig Int, test interrupted: $!\n" } -$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n"; +sub die_usage { + die << "EOF" +ktest.pl version: $VERSION + usage: ktest.pl [options] [config-file] + [options]: + -D value: Where value can act as an option override. + -D BUILD_NOCLEAN=1 + Sets global BUILD_NOCLEAN to 1 + -D TEST_TYPE[2]=build + Sets TEST_TYPE of test 2 to "build" + + It can also override all temp variables. + -D USE_TEMP_DIR:=1 + Will override all variables that use + "USE_TEMP_DIR=" + +EOF +; +} + +while ( $#ARGV >= 0 ) { + if ( $ARGV[0] eq "-D" ) { + shift; + die_usage if ($#ARGV < 1); + my $val = shift; + + if ($val =~ m/(.*?):=(.*)$/) { + set_variable($1, $2, 1); + } else { + $command_vars[$#command_vars + 1] = $val; + } + + } elsif ( $ARGV[0] =~ m/^-D(.*)/) { + my $val = $1; + shift; + + if ($val =~ m/(.*?):=(.*)$/) { + set_variable($1, $2, 1); + } else { + $command_vars[$#command_vars + 1] = $val; + } + } elsif ( $ARGV[0] eq "-h" ) { + die_usage; + } else { + last; + } +} +$#ARGV < 1 or die_usage; if ($#ARGV == 0) { $ktest_config = $ARGV[0]; if (! -f $ktest_config) { @@ -4466,6 +4572,10 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n"; + # Always show which build directory and output directory is being used + doprint "BUILD_DIR=$builddir\n"; + doprint "OUTPUT_DIR=$outputdir\n\n"; + if (defined($pre_test)) { my $ret = run_command $pre_test; if (!$ret && defined($pre_test_die) && diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index f43477a9b857..9c4c449a8f3e 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -1017,6 +1017,8 @@ # Note, PATCHCHECK_CHERRY requires PATCHCHECK_END to be defined. # (default 0) # +# PATCHCHECK_SKIP is an optional list of shas to skip testing +# # PATCHCHECK_TYPE is required and is the type of test to run: # build, boot, test. # diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile index a6d30c620908..483488f8c2ad 100644 --- a/tools/testing/selftests/arm64/abi/Makefile +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -12,4 +12,4 @@ $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S $(OUTPUT)/tpidr2: tpidr2.c $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ -static -include ../../../../include/nolibc/nolibc.h \ - -ffreestanding -Wall $^ -o $@ -lgcc + -I../.. -ffreestanding -Wall $^ -o $@ -lgcc diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 35f521e5f41c..002ec38a8bbb 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -21,6 +21,10 @@ #define TESTS_PER_HWCAP 3 +#ifndef AT_HWCAP3 +#define AT_HWCAP3 29 +#endif + /* * Function expected to generate exception when the feature is not * supported and return when it is supported. If the specific exception @@ -1098,6 +1102,18 @@ static const struct hwcap_data { .sigill_fn = hbc_sigill, .sigill_reliable = true, }, + { + .name = "MTE_FAR", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_MTE_FAR, + .cpuinfo = "mtefar", + }, + { + .name = "MTE_STOREONLY", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_MTE_STORE_ONLY, + .cpuinfo = "mtestoreonly", + }, }; typedef void (*sighandler_fn)(int, siginfo_t *, void *); diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index eb19dcc37a75..f58a9f89b952 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -3,31 +3,12 @@ #include <linux/sched.h> #include <linux/wait.h> +#include "kselftest.h" + #define SYS_TPIDR2 "S3_3_C13_C0_5" #define EXPECTED_TESTS 5 -static void putstr(const char *str) -{ - write(1, str, strlen(str)); -} - -static void putnum(unsigned int num) -{ - char c; - - if (num / 10) - putnum(num / 10); - - c = '0' + (num % 10); - write(1, &c, 1); -} - -static int tests_run; -static int tests_passed; -static int tests_failed; -static int tests_skipped; - static void set_tpidr2(uint64_t val) { asm volatile ( @@ -50,20 +31,6 @@ static uint64_t get_tpidr2(void) return val; } -static void print_summary(void) -{ - if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS) - putstr("# UNEXPECTED TEST COUNT: "); - - putstr("# Totals: pass:"); - putnum(tests_passed); - putstr(" fail:"); - putnum(tests_failed); - putstr(" xfail:0 xpass:0 skip:"); - putnum(tests_skipped); - putstr(" error:0\n"); -} - /* Processes should start with TPIDR2 == 0 */ static int default_value(void) { @@ -105,9 +72,8 @@ static int write_fork_read(void) if (newpid == 0) { /* In child */ if (get_tpidr2() != oldpid) { - putstr("# TPIDR2 changed in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 changed in child: %llx\n", + get_tpidr2()); exit(0); } @@ -115,14 +81,12 @@ static int write_fork_read(void) if (get_tpidr2() == getpid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } if (newpid < 0) { - putstr("# fork() failed: -"); - putnum(-newpid); - putstr("\n"); + ksft_print_msg("fork() failed: %d\n", newpid); return 0; } @@ -132,23 +96,22 @@ static int write_fork_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# waitpid() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("waitpid() failed: %d\n", errno); return 0; } if (waiting != newpid) { - putstr("# waitpid() returned wrong PID\n"); + ksft_print_msg("waitpid() returned wrong PID: %d != %d\n", + waiting, newpid); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (getpid() != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -188,35 +151,32 @@ static int write_clone_read(void) stack = malloc(__STACK_SIZE); if (!stack) { - putstr("# malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); return 0; } ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE, &parent_tid, 0, &child_tid); if (ret == -1) { - putstr("# clone() failed\n"); - putnum(errno); - putstr("\n"); + ksft_print_msg("clone() failed: %d\n", errno); return 0; } if (ret == 0) { /* In child */ if (get_tpidr2() != 0) { - putstr("# TPIDR2 non-zero in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 non-zero in child: %llx\n", + get_tpidr2()); exit(0); } if (gettid() == 0) - putstr("# Child TID==0\n"); + ksft_print_msg("Child TID==0\n"); set_tpidr2(gettid()); if (get_tpidr2() == gettid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } @@ -227,25 +187,22 @@ static int write_clone_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# wait4() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("wait4() failed: %d\n", errno); return 0; } if (waiting != ret) { - putstr("# wait4() returned wrong PID "); - putnum(waiting); - putstr("\n"); + ksft_print_msg("wait4() returned wrong PID %d\n", + waiting); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (parent != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -253,35 +210,14 @@ static int write_clone_read(void) } } -#define run_test(name) \ - if (name()) { \ - tests_passed++; \ - } else { \ - tests_failed++; \ - putstr("not "); \ - } \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" " #name "\n"); - -#define skip_test(name) \ - tests_skipped++; \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" # SKIP " #name "\n"); - int main(int argc, char **argv) { int ret; - putstr("TAP version 13\n"); - putstr("1.."); - putnum(EXPECTED_TESTS); - putstr("\n"); + ksft_print_header(); + ksft_set_plan(5); - putstr("# PID: "); - putnum(getpid()); - putstr("\n"); + ksft_print_msg("PID: %d\n", getpid()); /* * This test is run with nolibc which doesn't support hwcap and @@ -290,23 +226,21 @@ int main(int argc, char **argv) */ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { - run_test(default_value); - run_test(write_read); - run_test(write_sleep_read); - run_test(write_fork_read); - run_test(write_clone_read); + ksft_test_result(default_value(), "default_value\n"); + ksft_test_result(write_read, "write_read\n"); + ksft_test_result(write_sleep_read, "write_sleep_read\n"); + ksft_test_result(write_fork_read, "write_fork_read\n"); + ksft_test_result(write_clone_read, "write_clone_read\n"); } else { - putstr("# SME support not present\n"); + ksft_print_msg("SME support not present\n"); - skip_test(default_value); - skip_test(write_read); - skip_test(write_sleep_read); - skip_test(write_fork_read); - skip_test(write_clone_read); + ksft_test_result_skip("default_value\n"); + ksft_test_result_skip("write_read\n"); + ksft_test_result_skip("write_sleep_read\n"); + ksft_test_result_skip("write_fork_read\n"); + ksft_test_result_skip("write_clone_read\n"); } - print_summary(); - - return 0; + ksft_finished(); } diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 191c47ca0ed8..124bc883365e 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1061,11 +1061,31 @@ static bool sve_write_supported(struct test_config *config) if (config->sme_vl_in != config->sme_vl_expected) { return false; } + + if (!sve_supported()) + return false; } return true; } +static bool sve_write_fpsimd_supported(struct test_config *config) +{ + if (!sve_supported()) + return false; + + if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA)) + return false; + + if (config->svcr_expected & SVCR_SM) + return false; + + if (config->sme_vl_in != config->sme_vl_expected) + return false; + + return true; +} + static void fpsimd_write_expected(struct test_config *config) { int vl; @@ -1134,6 +1154,9 @@ static void sve_write_expected(struct test_config *config) int vl = vl_expected(config); int sme_vq = __sve_vq_from_vl(config->sme_vl_expected); + if (!vl) + return; + fill_random(z_expected, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl))); fill_random(p_expected, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl))); @@ -1152,7 +1175,7 @@ static void sve_write_expected(struct test_config *config) } } -static void sve_write(pid_t child, struct test_config *config) +static void sve_write_sve(pid_t child, struct test_config *config) { struct user_sve_header *sve; struct iovec iov; @@ -1161,6 +1184,9 @@ static void sve_write(pid_t child, struct test_config *config) vl = vl_expected(config); vq = __sve_vq_from_vl(vl); + if (!vl) + return; + iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { @@ -1195,6 +1221,45 @@ static void sve_write(pid_t child, struct test_config *config) free(iov.iov_base); } +static void sve_write_fpsimd(pid_t child, struct test_config *config) +{ + struct user_sve_header *sve; + struct user_fpsimd_state *fpsimd; + struct iovec iov; + int ret, vl, vq; + + vl = vl_expected(config); + vq = __sve_vq_from_vl(vl); + + if (!vl) + return; + + iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, + SVE_PT_REGS_FPSIMD); + iov.iov_base = malloc(iov.iov_len); + if (!iov.iov_base) { + ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", + iov.iov_len); + return; + } + memset(iov.iov_base, 0, iov.iov_len); + + sve = iov.iov_base; + sve->size = iov.iov_len; + sve->flags = SVE_PT_REGS_FPSIMD; + sve->vl = vl; + + fpsimd = iov.iov_base + SVE_PT_REGS_OFFSET; + memcpy(&fpsimd->vregs, v_expected, sizeof(v_expected)); + + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_SVE, &iov); + if (ret != 0) + ksft_print_msg("Failed to write SVE: %s (%d)\n", + strerror(errno), errno); + + free(iov.iov_base); +} + static bool za_write_supported(struct test_config *config) { if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM)) @@ -1386,7 +1451,13 @@ static struct test_definition sve_test_defs[] = { .name = "SVE write", .supported = sve_write_supported, .set_expected_values = sve_write_expected, - .modify_values = sve_write, + .modify_values = sve_write_sve, + }, + { + .name = "SVE write FPSIMD format", + .supported = sve_write_fpsimd_supported, + .set_expected_values = fpsimd_write_expected, + .modify_values = sve_write_fpsimd, }, }; @@ -1607,7 +1678,7 @@ int main(void) * Run the test set if there is no SVE or SME, with those we * have to pick a VL for each run. */ - if (!sve_supported()) { + if (!sve_supported() && !sme_supported()) { test_config.sve_vl_in = 0; test_config.sve_vl_expected = 0; test_config.sme_vl_in = 0; diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 577b6e05e860..b22303778fb0 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -170,7 +170,7 @@ static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type) memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); sve.vl = sve_vl_from_vq(SVE_VQ_MIN); - sve.flags = SVE_PT_VL_INHERIT; + sve.flags = SVE_PT_VL_INHERIT | SVE_PT_REGS_SVE; ret = set_sve(child, type, &sve); if (ret != 0) { ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n", @@ -235,6 +235,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, /* Set the VL by doing a set with no register payload */ memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); + sve.flags = SVE_PT_REGS_SVE; sve.vl = vl; ret = set_sve(child, type, &sve); if (ret != 0) { @@ -253,7 +254,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, return; } - ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n", + ksft_test_result(new_sve->vl == prctl_vl, "Set %s VL %u\n", type->name, vl); free(new_sve); @@ -301,8 +302,10 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) p[j] = j; } + /* This should only succeed for SVE */ ret = set_sve(child, type, sve); - ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n", + ksft_test_result((type->regset == NT_ARM_SVE) == (ret == 0), + "%s FPSIMD set via SVE: %d\n", type->name, ret); if (ret) goto out; @@ -750,9 +753,6 @@ int main(void) ksft_print_header(); ksft_set_plan(EXPECTED_TESTS); - if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) - ksft_exit_skip("SVE not available\n"); - child = fork(); if (!child) return do_child(); diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 2ee7f114d7fa..ff4e07503349 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -31,7 +31,7 @@ static int check_buffer_by_byte(int mem_type, int mode) int i, j, item; bool err; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { @@ -68,7 +68,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, bool err; char *und_ptr = NULL; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, @@ -164,7 +164,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode, size_t tagged_size, overflow_size; char *over_ptr = NULL; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, @@ -337,7 +337,7 @@ static int check_buffer_by_block(int mem_type, int mode) { int i, item, result = KSFT_PASS; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); cur_mte_cxt.fault_valid = false; for (i = 0; i < item; i++) { @@ -368,7 +368,7 @@ static int check_memory_initial_tags(int mem_type, int mode, int mapping) int run, fd; int total = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { /* check initial tags for anonymous mmap */ ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false); @@ -415,7 +415,7 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(20); diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index 7597fc632cad..5e97ee792e4d 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -88,7 +88,7 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping) int item = ARRAY_SIZE(sizes); item = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < item; run++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, UNDERFLOW, OVERFLOW); @@ -109,7 +109,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping) int run, fd, map_size, result = KSFT_PASS; int total = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { fd = create_temp_file(); if (fd == -1) @@ -160,8 +160,8 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); - mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); + mte_register_signal(SIGBUS, mte_default_handler, false); /* Set test plan */ ksft_set_plan(12); diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c index 3bfcd3848432..aad1234c7e0f 100644 --- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -151,7 +151,7 @@ static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int map_size = default_huge_page_size(); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) return KSFT_FAIL; @@ -180,7 +180,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) unsigned long map_size; prot_flag = PROT_READ | PROT_WRITE; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); map_size = default_huge_page_size(); map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, 0, 0); @@ -210,7 +210,7 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin map_size = default_huge_page_size(); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, 0, 0); if (check_allocated_memory_range(ptr, map_size, mem_type, @@ -235,8 +235,8 @@ int main(int argc, char *argv[]) return err; /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); allocate_hugetlb(); diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index 88c74bc46d4f..0cf5faef1724 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -106,7 +106,7 @@ static int check_madvise_options(int mem_type, int mode, int mapping) return err; } - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true); if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS) return KSFT_FAIL; @@ -141,8 +141,8 @@ int main(int argc, char *argv[]) return KSFT_FAIL; } /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(4); diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 17694caaff53..c100af3012cb 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE +#include <assert.h> #include <errno.h> #include <fcntl.h> #include <signal.h> @@ -23,6 +24,35 @@ #define OVERFLOW MT_GRANULE_SIZE #define TAG_CHECK_ON 0 #define TAG_CHECK_OFF 1 +#define ATAG_CHECK_ON 1 +#define ATAG_CHECK_OFF 0 + +#define TEST_NAME_MAX 256 + +enum mte_mem_check_type { + CHECK_ANON_MEM = 0, + CHECK_FILE_MEM = 1, + CHECK_CLEAR_PROT_MTE = 2, +}; + +enum mte_tag_op_type { + TAG_OP_ALL = 0, + TAG_OP_STONLY = 1, +}; + +struct check_mmap_testcase { + int check_type; + int mem_type; + int mte_sync; + int mapping; + int tag_check; + int atag_check; + int tag_op; + bool enable_tco; +}; + +#define TAG_OP_ALL 0 +#define TAG_OP_STONLY 1 static size_t page_size; static int sizes[] = { @@ -30,8 +60,17 @@ static int sizes[] = { /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 }; -static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +static int check_mte_memory(char *ptr, int size, int mode, + int tag_check,int atag_check, int tag_op) { + char buf[MT_GRANULE_SIZE]; + + if (!mtefar_support && atag_check == ATAG_CHECK_ON) + return KSFT_SKIP; + + if (atag_check == ATAG_CHECK_ON) + ptr = mte_insert_atag(ptr); + mte_initialize_current_context(mode, (uintptr_t)ptr, size); memset(ptr, '1', size); mte_wait_after_trig(); @@ -54,16 +93,34 @@ static int check_mte_memory(char *ptr, int size, int mode, int tag_check) if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) return KSFT_FAIL; + if (tag_op == TAG_OP_STONLY) { + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memcpy(buf, ptr - UNDERFLOW, MT_GRANULE_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memcpy(buf, ptr + size, MT_GRANULE_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + } + return KSFT_PASS; } -static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, + int tag_check, int atag_check, int tag_op) { char *ptr, *map_ptr; int run, result, map_size; int item = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + if (tag_op == TAG_OP_STONLY && !mtestonly_support) + return KSFT_SKIP; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op); for (run = 0; run < item; run++) { map_size = sizes[run] + OVERFLOW + UNDERFLOW; map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); @@ -79,23 +136,27 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i munmap((void *)map_ptr, map_size); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op); mte_clear_tags((void *)ptr, sizes[run]); mte_free_memory((void *)map_ptr, map_size, mem_type, false); - if (result == KSFT_FAIL) - return KSFT_FAIL; + if (result != KSFT_PASS) + return result; } return KSFT_PASS; } -static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +static int check_file_memory_mapping(int mem_type, int mode, int mapping, + int tag_check, int atag_check, int tag_op) { char *ptr, *map_ptr; int run, fd, map_size; int total = ARRAY_SIZE(sizes); int result = KSFT_PASS; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + if (tag_op == TAG_OP_STONLY && !mtestonly_support) + return KSFT_SKIP; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op); for (run = 0; run < total; run++) { fd = create_temp_file(); if (fd == -1) @@ -117,24 +178,24 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op); mte_clear_tags((void *)ptr, sizes[run]); munmap((void *)map_ptr, map_size); close(fd); - if (result == KSFT_FAIL) - break; + if (result != KSFT_PASS) + return result; } - return result; + return KSFT_PASS; } -static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int atag_check) { char *ptr, *map_ptr; int run, prot_flag, result, fd, map_size; int total = ARRAY_SIZE(sizes); prot_flag = PROT_READ | PROT_WRITE; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { map_size = sizes[run] + OVERFLOW + UNDERFLOW; ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, @@ -150,10 +211,10 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); if (result != KSFT_PASS) - return KSFT_FAIL; + return result; fd = create_temp_file(); if (fd == -1) @@ -174,19 +235,715 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); close(fd); if (result != KSFT_PASS) - return KSFT_FAIL; + return result; } return KSFT_PASS; } +const char *format_test_name(struct check_mmap_testcase *tc) +{ + static char test_name[TEST_NAME_MAX]; + const char *check_type_str; + const char *mem_type_str; + const char *sync_str; + const char *mapping_str; + const char *tag_check_str; + const char *atag_check_str; + const char *tag_op_str; + + switch (tc->check_type) { + case CHECK_ANON_MEM: + check_type_str = "anonymous memory"; + break; + case CHECK_FILE_MEM: + check_type_str = "file memory"; + break; + case CHECK_CLEAR_PROT_MTE: + check_type_str = "clear PROT_MTE flags"; + break; + default: + assert(0); + break; + } + + switch (tc->mem_type) { + case USE_MMAP: + mem_type_str = "mmap"; + break; + case USE_MPROTECT: + mem_type_str = "mmap/mprotect"; + break; + default: + assert(0); + break; + } + + switch (tc->mte_sync) { + case MTE_NONE_ERR: + sync_str = "no error"; + break; + case MTE_SYNC_ERR: + sync_str = "sync error"; + break; + case MTE_ASYNC_ERR: + sync_str = "async error"; + break; + default: + assert(0); + break; + } + + switch (tc->mapping) { + case MAP_SHARED: + mapping_str = "shared"; + break; + case MAP_PRIVATE: + mapping_str = "private"; + break; + default: + assert(0); + break; + } + + switch (tc->tag_check) { + case TAG_CHECK_ON: + tag_check_str = "tag check on"; + break; + case TAG_CHECK_OFF: + tag_check_str = "tag check off"; + break; + default: + assert(0); + break; + } + + switch (tc->atag_check) { + case ATAG_CHECK_ON: + atag_check_str = "with address tag [63:60]"; + break; + case ATAG_CHECK_OFF: + atag_check_str = "without address tag [63:60]"; + break; + default: + assert(0); + break; + } + + snprintf(test_name, sizeof(test_name), + "Check %s with %s mapping, %s mode, %s memory and %s (%s)\n", + check_type_str, mapping_str, sync_str, mem_type_str, + tag_check_str, atag_check_str); + + switch (tc->tag_op) { + case TAG_OP_ALL: + tag_op_str = ""; + break; + case TAG_OP_STONLY: + tag_op_str = " / store-only"; + break; + default: + assert(0); + break; + } + + snprintf(test_name, TEST_NAME_MAX, + "Check %s with %s mapping, %s mode, %s memory and %s (%s%s)\n", + check_type_str, mapping_str, sync_str, mem_type_str, + tag_check_str, atag_check_str, tag_op_str); + + return test_name; +} + int main(int argc, char *argv[]) { - int err; + int err, i; int item = ARRAY_SIZE(sizes); + struct check_mmap_testcase test_cases[]= { + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = true, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = true, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_NONE_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_NONE_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + }; err = mte_default_setup(); if (err) @@ -200,64 +957,51 @@ int main(int argc, char *argv[]) sizes[item - 2] = page_size; sizes[item - 1] = page_size + 1; - /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); - /* Set test plan */ - ksft_set_plan(22); - - mte_enable_pstate_tco(); - - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n"); - - mte_disable_pstate_tco(); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n"); - - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); - - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); - - evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), - "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); - evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), - "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + ksft_set_plan(ARRAY_SIZE(test_cases)); + + for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) { + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler, + test_cases[i].atag_check == ATAG_CHECK_ON); + mte_register_signal(SIGSEGV, mte_default_handler, + test_cases[i].atag_check == ATAG_CHECK_ON); + + if (test_cases[i].enable_tco) + mte_enable_pstate_tco(); + else + mte_disable_pstate_tco(); + + switch (test_cases[i].check_type) { + case CHECK_ANON_MEM: + evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].tag_check, + test_cases[i].atag_check, + test_cases[i].tag_op), + format_test_name(&test_cases[i])); + break; + case CHECK_FILE_MEM: + evaluate_test(check_file_memory_mapping(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].tag_check, + test_cases[i].atag_check, + test_cases[i].tag_op), + format_test_name(&test_cases[i])); + break; + case CHECK_CLEAR_PROT_MTE: + evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].atag_check), + format_test_name(&test_cases[i])); + break; + default: + exit(KSFT_FAIL); + } + } mte_restore_setup(); ksft_print_cnts(); diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c index 4c89e9538ca0..f7f320defa7b 100644 --- a/tools/testing/selftests/arm64/mte/check_prctl.c +++ b/tools/testing/selftests/arm64/mte/check_prctl.c @@ -12,6 +12,10 @@ #include "kselftest.h" +#ifndef AT_HWCAP3 +#define AT_HWCAP3 29 +#endif + static int set_tagged_addr_ctrl(int val) { int ret; @@ -60,7 +64,7 @@ void check_basic_read(void) /* * Attempt to set a specified combination of modes. */ -void set_mode_test(const char *name, int hwcap2, int mask) +void set_mode_test(const char *name, int hwcap2, int hwcap3, int mask) { int ret; @@ -69,6 +73,11 @@ void set_mode_test(const char *name, int hwcap2, int mask) return; } + if ((getauxval(AT_HWCAP3) & hwcap3) != hwcap3) { + ksft_test_result_skip("%s\n", name); + return; + } + ret = set_tagged_addr_ctrl(mask); if (ret < 0) { ksft_test_result_fail("%s\n", name); @@ -81,7 +90,7 @@ void set_mode_test(const char *name, int hwcap2, int mask) return; } - if ((ret & PR_MTE_TCF_MASK) == mask) { + if ((ret & (PR_MTE_TCF_MASK | PR_MTE_STORE_ONLY)) == mask) { ksft_test_result_pass("%s\n", name); } else { ksft_print_msg("Got %x, expected %x\n", @@ -93,12 +102,16 @@ void set_mode_test(const char *name, int hwcap2, int mask) struct mte_mode { int mask; int hwcap2; + int hwcap3; const char *name; } mte_modes[] = { - { PR_MTE_TCF_NONE, 0, "NONE" }, - { PR_MTE_TCF_SYNC, HWCAP2_MTE, "SYNC" }, - { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" }, - { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, "SYNC+ASYNC" }, + { PR_MTE_TCF_NONE, 0, 0, "NONE" }, + { PR_MTE_TCF_SYNC, HWCAP2_MTE, 0, "SYNC" }, + { PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "ASYNC" }, + { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "SYNC+ASYNC" }, + { PR_MTE_TCF_SYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+STONLY" }, + { PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "ASYNC+STONLY" }, + { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+ASYNC+STONLY" }, }; int main(void) @@ -106,11 +119,11 @@ int main(void) int i; ksft_print_header(); - ksft_set_plan(5); + ksft_set_plan(ARRAY_SIZE(mte_modes)); check_basic_read(); for (i = 0; i < ARRAY_SIZE(mte_modes); i++) - set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, + set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, mte_modes[i].hwcap3, mte_modes[i].mask); ksft_print_cnts(); diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index a3d1e23fe02a..4b764f2a8185 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -57,7 +57,7 @@ static int check_single_included_tags(int mem_type, int mode) return KSFT_FAIL; for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) { - ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag)); + ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag), false); if (ret != 0) result = KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ @@ -91,7 +91,7 @@ static int check_multiple_included_tags(int mem_type, int mode) for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) { excl_mask |= 1 << tag; - mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask)); + mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask), false); /* Try to catch a excluded tag by a number of tries. */ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { ptr = mte_insert_tags(ptr, BUFFER_SIZE); @@ -120,7 +120,7 @@ static int check_all_included_tags(int mem_type, int mode) mem_type, false) != KSFT_PASS) return KSFT_FAIL; - ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK); + ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK, false); if (ret != 0) return KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ @@ -145,7 +145,7 @@ static int check_none_included_tags(int mem_type, int mode) if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS) return KSFT_FAIL; - ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK); + ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK, false); if (ret != 0) return KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ @@ -180,7 +180,7 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(4); diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index f4ae5f87a3b7..fb7936c4e097 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -44,7 +44,7 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, err = KSFT_PASS; len = 2 * page_sz; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); fd = create_temp_file(); if (fd == -1) return KSFT_FAIL; @@ -211,7 +211,7 @@ int main(int argc, char *argv[]) return err; /* Register signal handlers */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(64); diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index a1dc2fe5285b..397e57dd946a 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -6,6 +6,7 @@ #include <signal.h> #include <stdio.h> #include <stdlib.h> +#include <time.h> #include <unistd.h> #include <linux/auxvec.h> @@ -19,20 +20,40 @@ #include "mte_common_util.h" #include "mte_def.h" +#ifndef SA_EXPOSE_TAGBITS +#define SA_EXPOSE_TAGBITS 0x00000800 +#endif + #define INIT_BUFFER_SIZE 256 struct mte_fault_cxt cur_mte_cxt; +bool mtefar_support; +bool mtestonly_support; static unsigned int mte_cur_mode; static unsigned int mte_cur_pstate_tco; +static bool mte_cur_stonly; void mte_default_handler(int signum, siginfo_t *si, void *uc) { + struct sigaction sa; unsigned long addr = (unsigned long)si->si_addr; + unsigned char si_tag, si_atag; + + sigaction(signum, NULL, &sa); + + if (sa.sa_flags & SA_EXPOSE_TAGBITS) { + si_tag = MT_FETCH_TAG(addr); + si_atag = MT_FETCH_ATAG(addr); + addr = MT_CLEAR_TAGS(addr); + } else { + si_tag = 0; + si_atag = 0; + } if (signum == SIGSEGV) { #ifdef DEBUG - ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n", - ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); + ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx, si_tag=%x, si_atag=%x\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code, si_tag, si_atag); #endif if (si->si_code == SEGV_MTEAERR) { if (cur_mte_cxt.trig_si_code == si->si_code) @@ -45,13 +66,18 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) } /* Compare the context for precise error */ else if (si->si_code == SEGV_MTESERR) { + if ((!mtefar_support && si_atag) || (si_atag != MT_FETCH_ATAG(cur_mte_cxt.trig_addr))) { + ksft_print_msg("Invalid MTE synchronous exception caught for address tag! si_tag=%x, si_atag: %x\n", si_tag, si_atag); + exit(KSFT_FAIL); + } + if (cur_mte_cxt.trig_si_code == si->si_code && ((cur_mte_cxt.trig_range >= 0 && - addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || (cur_mte_cxt.trig_range < 0 && - addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { + addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { cur_mte_cxt.fault_valid = true; /* Adjust the pc by 4 */ ((ucontext_t *)uc)->uc_mcontext.pc += 4; @@ -67,11 +93,11 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); if ((cur_mte_cxt.trig_range >= 0 && - addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || (cur_mte_cxt.trig_range < 0 && - addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { + addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { cur_mte_cxt.fault_valid = true; /* Adjust the pc by 4 */ ((ucontext_t *)uc)->uc_mcontext.pc += 4; @@ -79,12 +105,17 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) } } -void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)) +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *), + bool export_tags) { struct sigaction sa; sa.sa_sigaction = handler; sa.sa_flags = SA_SIGINFO; + + if (export_tags && signal == SIGSEGV) + sa.sa_flags |= SA_EXPOSE_TAGBITS; + sigemptyset(&sa.sa_mask); sigaction(signal, &sa, NULL); } @@ -120,6 +151,19 @@ void mte_clear_tags(void *ptr, size_t size) mte_clear_tag_address_range(ptr, size); } +void *mte_insert_atag(void *ptr) +{ + unsigned char atag; + + atag = mtefar_support ? (random() % MT_ATAG_MASK) + 1 : 0; + return (void *)MT_SET_ATAG((unsigned long)ptr, atag); +} + +void *mte_clear_atag(void *ptr) +{ + return (void *)MT_CLEAR_ATAG((unsigned long)ptr); +} + static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, size_t range_before, size_t range_after, bool tags, int fd) @@ -272,7 +316,7 @@ void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range) cur_mte_cxt.trig_si_code = 0; } -int mte_switch_mode(int mte_option, unsigned long incl_mask) +int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly) { unsigned long en = 0; @@ -304,6 +348,9 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) break; } + if (mtestonly_support && stonly) + en |= PR_MTE_STORE_ONLY; + en |= (incl_mask << PR_MTE_TAG_SHIFT); /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) { @@ -316,12 +363,21 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) int mte_default_setup(void) { unsigned long hwcaps2 = getauxval(AT_HWCAP2); + unsigned long hwcaps3 = getauxval(AT_HWCAP3); unsigned long en = 0; int ret; + /* To generate random address tag */ + srandom(time(NULL)); + if (!(hwcaps2 & HWCAP2_MTE)) ksft_exit_skip("MTE features unavailable\n"); + mtefar_support = !!(hwcaps3 & HWCAP3_MTE_FAR); + + if (hwcaps3 & HWCAP3_MTE_STORE_ONLY) + mtestonly_support = true; + /* Get current mte mode */ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); if (ret < 0) { @@ -335,6 +391,8 @@ int mte_default_setup(void) else if (ret & PR_MTE_TCF_NONE) mte_cur_mode = MTE_NONE_ERR; + mte_cur_stonly = (ret & PR_MTE_STORE_ONLY) ? true : false; + mte_cur_pstate_tco = mte_get_pstate_tco(); /* Disable PSTATE.TCO */ mte_disable_pstate_tco(); @@ -343,7 +401,7 @@ int mte_default_setup(void) void mte_restore_setup(void) { - mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG, mte_cur_stonly); if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN) mte_enable_pstate_tco(); else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index a0017a303beb..250d671329a5 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -37,10 +37,13 @@ struct mte_fault_cxt { }; extern struct mte_fault_cxt cur_mte_cxt; +extern bool mtefar_support; +extern bool mtestonly_support; /* MTE utility functions */ void mte_default_handler(int signum, siginfo_t *si, void *uc); -void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)); +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *), + bool export_tags); void mte_wait_after_trig(void); void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags); void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, @@ -54,9 +57,11 @@ void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, size_t range_before, size_t range_after); void *mte_insert_tags(void *ptr, size_t size); void mte_clear_tags(void *ptr, size_t size); +void *mte_insert_atag(void *ptr); +void *mte_clear_atag(void *ptr); int mte_default_setup(void); void mte_restore_setup(void); -int mte_switch_mode(int mte_option, unsigned long incl_mask); +int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly); void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range); /* Common utility functions */ diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h index 9b188254b61a..6ad22f07c9b8 100644 --- a/tools/testing/selftests/arm64/mte/mte_def.h +++ b/tools/testing/selftests/arm64/mte/mte_def.h @@ -42,6 +42,8 @@ #define MT_TAG_COUNT 16 #define MT_INCLUDE_TAG_MASK 0xFFFF #define MT_EXCLUDE_TAG_MASK 0x0 +#define MT_ATAG_SHIFT 60 +#define MT_ATAG_MASK 0xFUL #define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1) #define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT)) @@ -49,6 +51,12 @@ #define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK)) #define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE)) +#define MT_CLEAR_ATAG(x) ((x) & ~(MT_TAG_MASK << MT_ATAG_SHIFT)) +#define MT_SET_ATAG(x, y) ((x) | (((y) & MT_ATAG_MASK) << MT_ATAG_SHIFT)) +#define MT_FETCH_ATAG(x) ((x >> MT_ATAG_SHIFT) & (MT_ATAG_MASK)) + +#define MT_CLEAR_TAGS(x) (MT_CLEAR_ATAG(MT_CLEAR_TAG(x))) + #define MT_PSTATE_TCO_SHIFT 25 #define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT) #define MT_PSTATE_TCO_EN 1 diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST index 1789a61d0a9b..f748f2c33b22 100644 --- a/tools/testing/selftests/bpf/DENYLIST +++ b/tools/testing/selftests/bpf/DENYLIST @@ -1,6 +1,5 @@ # TEMPORARY # Alphabetical order -dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge stacktrace_build_id stacktrace_build_id_nmi diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 deleted file mode 100644 index 12e99c0277a8..000000000000 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ /dev/null @@ -1 +0,0 @@ -tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 910d8d6402ef..4863106034df 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -109,6 +109,7 @@ TEST_PROGS := test_kmod.sh \ test_xdping.sh \ test_bpftool_build.sh \ test_bpftool.sh \ + test_bpftool_map.sh \ test_bpftool_metadata.sh \ test_doc_build.sh \ test_xsk.sh \ @@ -840,6 +841,11 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ +# This works around GCC warning about snprintf truncating strings like: +# +# char a[PATH_MAX], b[PATH_MAX]; +# snprintf(a, "%s/foo", b); // triggers -Wformat-truncation +$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation $(OUTPUT)/veristat.o: $(BPFOBJ) $(OUTPUT)/veristat: $(OUTPUT)/veristat.o $(call msg,BINARY,,$@) diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h index 68a51dcc0669..16f8ce832004 100644 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -46,8 +46,11 @@ void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt, int node_id, __u64 flags) __ksym __weak; +int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak; void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak; +#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start) + #else /* when compiled as user space code */ #define __arena diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h index a9674e544322..c550e5711967 100644 --- a/tools/testing/selftests/bpf/bpf_atomic.h +++ b/tools/testing/selftests/bpf/bpf_atomic.h @@ -61,7 +61,7 @@ extern bool CONFIG_X86_64 __kconfig __weak; #define smp_mb() \ ({ \ - unsigned long __val; \ + volatile unsigned long __val; \ __sync_fetch_and_add(&__val, 0); \ }) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 8215c9b3115e..9386dfe8b884 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -69,7 +69,7 @@ extern int bpf_get_file_xattr(struct file *file, const char *name, struct bpf_dynptr *value_ptr) __ksym; extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e4535451322e..15f626014872 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -4,6 +4,7 @@ #include <sys/mount.h> #include <sys/stat.h> #include <sys/types.h> +#include <sys/xattr.h> #include <linux/limits.h> #include <stdio.h> #include <stdlib.h> @@ -319,6 +320,26 @@ int join_parent_cgroup(const char *relative_path) } /** + * set_cgroup_xattr() - Set xattr on a cgroup dir + * @relative_path: The cgroup path, relative to the workdir, to set xattr + * @name: xattr name + * @value: xattr value + * + * This function set xattr on cgroup dir. + * + * On success, it returns 0, otherwise on failure it returns -1. + */ +int set_cgroup_xattr(const char *relative_path, + const char *name, + const char *value) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path(cgroup_path, relative_path); + return setxattr(cgroup_path, name, value, strlen(value) + 1, 0); +} + +/** * __cleanup_cgroup_environment() - Delete temporary cgroups * * This is a helper for cleanup_cgroup_environment() that is responsible for diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 502845160d88..182e1ac36c95 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -26,6 +26,10 @@ int join_cgroup(const char *relative_path); int join_root_cgroup(void); int join_parent_cgroup(const char *relative_path); +int set_cgroup_xattr(const char *relative_path, + const char *name, + const char *value); + int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f74e1ea0ad3b..8916ab814a3e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -97,6 +97,9 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NF_TABLES_IPV4=y CONFIG_NF_TABLES_IPV6=y CONFIG_NETFILTER_INGRESS=y +CONFIG_IP_NF_IPTABLES_LEGACY=y +CONFIG_IP6_NF_IPTABLES_LEGACY=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NF_FLOW_TABLE=y CONFIG_NF_FLOW_TABLE_INET=y CONFIG_NETFILTER_NETLINK=y @@ -105,6 +108,7 @@ CONFIG_IP_NF_IPTABLES=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_FILTER=y CONFIG_NF_NAT=y +CONFIG_PACKET=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el new file mode 100644 index 000000000000..9acf389dc4ce --- /dev/null +++ b/tools/testing/selftests/bpf/config.ppc64el @@ -0,0 +1,93 @@ +CONFIG_ALTIVEC=y +CONFIG_AUDIT=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BONDING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPF_PRELOAD=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_CPUSETS=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_FS=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEVTMPFS=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_FRAME_POINTER=y +CONFIG_FRAME_WARN=1280 +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HVC_CONSOLE=y +CONFIG_INET=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_JUMP_LABEL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KPROBES=y +CONFIG_MEMCG=y +CONFIG_NAMESPACES=y +CONFIG_NET_ACT_BPF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_NET=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NONPORTABLE=y +CONFIG_NR_CPUS=256 +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PPC64=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_PSERIES=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_RT_GROUP_SCHED=y +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SMP=y +CONFIG_SOC_VIRT=y +CONFIG_SYSVIPC=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_THREAD_SHIFT=14 +CONFIG_TLS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS_LOOPBACK=y +CONFIG_VSX=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index 67557cda2208..42b49870e520 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -13,7 +13,7 @@ static void test_fail_cases(void) { LIBBPF_OPTS(bpf_map_create_opts, opts); - __u32 value; + __u32 value = 0; int fd, err; /* Invalid key size */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 6befa870434b..4a0670c056ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -489,10 +489,28 @@ cleanup: bpf_link__destroy(link); } +static int verify_tracing_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type")) + return -1; + + ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie"); + + return 0; +} + static void tracing_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd; + int prog_fd, err; int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1; LIBBPF_OPTS(bpf_test_run_opts, opts); LIBBPF_OPTS(bpf_link_create_opts, link_opts); @@ -507,6 +525,10 @@ static void tracing_subtest(struct test_bpf_cookie *skel) if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create")) goto cleanup; + err = verify_tracing_link_info(fentry_fd, cookie); + if (!ASSERT_OK(err, "verify_tracing_link_info")) + goto cleanup; + cookie = 0x20000000000000L; prog_fd = bpf_program__fd(skel->progs.fexit_test1); link_opts.tracing.cookie = cookie; @@ -635,10 +657,29 @@ cleanup: bpf_link__destroy(link); } +static int verify_raw_tp_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + memset(&info, 0, sizeof(info)); + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type")) + return -1; + + ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie"); + + return 0; +} + static void raw_tp_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd, link_fd = -1; + int err, prog_fd, link_fd = -1; struct bpf_link *link = NULL; LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts); LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts); @@ -656,6 +697,11 @@ static void raw_tp_subtest(struct test_bpf_cookie *skel) goto cleanup; usleep(1); /* trigger */ + + err = verify_raw_tp_link_info(link_fd, cookie); + if (!ASSERT_OK(err, "verify_raw_tp_link_info")) + goto cleanup; + close(link_fd); /* detach */ link_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index add4a18c33bd..5225d69bf79b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -323,7 +323,7 @@ static void test_task_pidfd(void) static void test_task_sleepable(void) { struct bpf_iter_tasks *skel; - int pid, status, err, data_pipe[2], finish_pipe[2], c; + int pid, status, err, data_pipe[2], finish_pipe[2], c = 0; char *test_data = NULL; char *test_data_long = NULL; char *data[2]; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index fe2c502e5089..ecc3d47919ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -78,7 +78,7 @@ static int test_setup_uffd(void *fault_addr) } uffd_register.range.start = (unsigned long)fault_addr; - uffd_register.range.len = 4096; + uffd_register.range.len = getpagesize(); uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) { close(uffd); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index c0a776feec23..82903585c870 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -879,6 +879,122 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, "static int bpf_cgrp_storage_busy = (int)2", 2); } +struct btf_dump_string_ctx { + struct btf *btf; + struct btf_dump *d; + char *str; + struct btf_dump_type_data_opts *opts; + int array_id; +}; + +static int btf_dump_one_string(struct btf_dump_string_ctx *ctx, + char *ptr, size_t ptr_sz, + const char *expected_val) +{ + size_t type_sz; + int ret; + + ctx->str[0] = '\0'; + type_sz = btf__resolve_size(ctx->btf, ctx->array_id); + ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts); + if (type_sz <= ptr_sz) { + if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz")) + return -EINVAL; + } + if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match")) + return -EFAULT; + return 0; +} + +static void btf_dump_strings(struct btf_dump_string_ctx *ctx) +{ + struct btf_dump_type_data_opts *opts = ctx->opts; + + opts->emit_strings = true; + + opts->compact = true; + opts->emit_zeroes = false; + + opts->skip_names = false; + btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\""); + + opts->skip_names = true; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->emit_zeroes = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->compact = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* Non-printable characters come out as hex. */ + btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\""); + btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\""); + + /* + * Strings that are too long for the specified type ("char[4]") + * should fall back to the current behavior. + */ + opts->compact = true; + btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]"); + + /* + * Strings that are too short for the specified type ("char[4]") + * should work normally. + */ + btf_dump_one_string(ctx, "ab", 3, "\"ab\""); + + /* Non-NUL-terminated arrays don't get printed as strings. */ + char food[4] = { 'f', 'o', 'o', 'd' }; + char bye[3] = { 'b', 'y', 'e' }; + + btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]"); + btf_dump_one_string(ctx, bye, 3, "['b','y','e',]"); + + /* The embedded NUL should terminate the string. */ + char embed[4] = { 'f', 'o', '\0', 'd' }; + + btf_dump_one_string(ctx, embed, 4, "\"fo\""); +} + +static void test_btf_dump_string_data(void) +{ + struct test_ctx t = {}; + char str[STRSIZE]; + struct btf_dump *d; + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + struct btf_dump_string_ctx ctx; + int char_id, int_id, array_id; + + if (test_ctx__init(&t)) + return; + + d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + /* Generate BTF for a four-element char array. */ + char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR); + ASSERT_EQ(char_id, 1, "char_id"); + int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(int_id, 2, "int_id"); + array_id = btf__add_array(t.btf, int_id, char_id, 4); + ASSERT_EQ(array_id, 3, "array_id"); + + ctx.btf = t.btf; + ctx.d = d; + ctx.str = str; + ctx.opts = &opts; + ctx.array_id = array_id; + + btf_dump_strings(&ctx); + + btf_dump__free(d); + test_ctx__free(&t); +} + static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, const char *name, const char *expected_val, void *data, size_t data_sz) @@ -970,6 +1086,8 @@ void test_btf_dump() { test_btf_dump_struct_data(btf, d, str); if (test__start_subtest("btf_dump: var_data")) test_btf_dump_var_data(btf, d, str); + if (test__start_subtest("btf_dump: string_data")) + test_btf_dump_string_data(); btf_dump__free(d); btf__free(btf); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c new file mode 100644 index 000000000000..bb60704a3ef9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_mprog.skel.h" + +static void assert_mprog_count(int cg, int atype, int expected) +{ + __u32 count = 0, attach_flags = 0; + int err; + + err = bpf_prog_query(cg, atype, 0, &attach_flags, + NULL, &count); + ASSERT_EQ(count, expected, "count"); + ASSERT_EQ(err, 0, "prog_query"); +} + +static void test_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + optd.expected_revision = 5; + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + LIBBPF_OPTS_RESET(optd); + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + LIBBPF_OPTS(bpf_cgroup_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + struct bpf_link *link1, *link2, *link3, *link4; + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE | BPF_F_LINK, + .relative_id = id_from_link_fd(bpf_link__fd(link1)), + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "link_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + __u32 fd1, fd2, fd3, fd4; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/preorder_prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, -EINVAL, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + struct bpf_link *link1, *link2, *link3, *link4; + struct cgroup_mprog *skel; + __u32 fd2; + int cg; + + cg = test__join_cgroup("/preorder_link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_ERR_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_invalid_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + __u32 fd1, fd2, id2; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/invalid_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + id2 = id_from_prog_fd(fd2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_LINK, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER, + .replace_prog_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +void test_cgroup_mprog_opts(void) +{ + if (test__start_subtest("prog_attach_detach")) + test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("link_attach_detach")) + test_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_prog_attach_detach")) + test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_link_attach_detach")) + test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("invalid_attach_detach")) + test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c new file mode 100644 index 000000000000..a36d2e968bc5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_preorder.skel.h" + +static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opts); + enum bpf_attach_type prog_p_atype, prog_p2_atype; + int prog_p_fd, prog_p2_fd; + struct cgroup_preorder *skel = NULL; + struct bpf_program *prog; + __u8 *result, buf; + socklen_t optlen = 1; + int err = 0; + + skel = cgroup_preorder__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) + return 0; + + LIBBPF_OPTS_RESET(opts); + opts.flags = BPF_F_ALLOW_MULTI; + prog = skel->progs.parent; + prog_p_fd = bpf_program__fd(prog); + prog_p_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE; + if (has_relative_fd) + opts.relative_fd = prog_p_fd; + prog = skel->progs.parent_2; + prog_p2_fd = bpf_program__fd(prog); + prog_p2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2")) + goto detach_parent; + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_parent_2; + + result = skel->bss->result; + ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values"); + +detach_parent_2: + ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype), + "bpf_prog_detach2-parent_2"); +detach_parent: + ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype), + "bpf_prog_detach2-parent"); +close_skel: + cgroup_preorder__destroy(skel); + return err; +} + +void test_cgroup_mprog_ordering(void) +{ + int cg_parent = -1, sock_fd = -1; + + cg_parent = test__join_cgroup("/parent"); + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) + goto out; + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(sock_fd, 0, "socket")) + goto out; + + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1"); + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2"); + +out: + close(sock_fd); + close(cg_parent); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c index 87978a0f7eb7..e0dd966e4a3e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c @@ -7,133 +7,60 @@ #include <string.h> #include <unistd.h> #include <sys/socket.h> -#include <sys/xattr.h> - #include <test_progs.h> +#include "cgroup_helpers.h" #include "read_cgroupfs_xattr.skel.h" #include "cgroup_read_xattr.skel.h" -#define CGROUP_FS_ROOT "/sys/fs/cgroup/" -#define CGROUP_FS_PARENT CGROUP_FS_ROOT "foo/" +#define CGROUP_FS_PARENT "foo/" #define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/" - -static int move_pid_to_cgroup(const char *cgroup_folder, pid_t pid) -{ - char filename[128]; - char pid_str[64]; - int procs_fd; - int ret; - - snprintf(filename, sizeof(filename), "%scgroup.procs", cgroup_folder); - snprintf(pid_str, sizeof(pid_str), "%d", pid); - - procs_fd = open(filename, O_WRONLY | O_APPEND); - if (!ASSERT_OK_FD(procs_fd, "open")) - return -1; - - ret = write(procs_fd, pid_str, strlen(pid_str)); - close(procs_fd); - if (!ASSERT_GT(ret, 0, "write cgroup.procs")) - return -1; - return 0; -} - -static void reset_cgroups_and_lo(void) -{ - rmdir(CGROUP_FS_CHILD); - rmdir(CGROUP_FS_PARENT); - system("ip addr del 1.1.1.1/32 dev lo"); - system("ip link set dev lo down"); -} +#define TMP_FILE "/tmp/selftests_cgroup_xattr" static const char xattr_value_a[] = "bpf_selftest_value_a"; static const char xattr_value_b[] = "bpf_selftest_value_b"; static const char xattr_name[] = "user.bpf_test"; -static int setup_cgroups_and_lo(void) -{ - int err; - - err = mkdir(CGROUP_FS_PARENT, 0755); - if (!ASSERT_OK(err, "mkdir 1")) - goto error; - err = mkdir(CGROUP_FS_CHILD, 0755); - if (!ASSERT_OK(err, "mkdir 2")) - goto error; - - err = setxattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a, - strlen(xattr_value_a) + 1, 0); - if (!ASSERT_OK(err, "setxattr 1")) - goto error; - - err = setxattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b, - strlen(xattr_value_b) + 1, 0); - if (!ASSERT_OK(err, "setxattr 2")) - goto error; - - err = system("ip link set dev lo up"); - if (!ASSERT_OK(err, "lo up")) - goto error; - - err = system("ip addr add 1.1.1.1 dev lo"); - if (!ASSERT_OK(err, "lo addr v4")) - goto error; - - err = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); - if (!ASSERT_OK(err, "write_sysctl")) - goto error; - - return 0; -error: - reset_cgroups_and_lo(); - return err; -} - static void test_read_cgroup_xattr(void) { - struct sockaddr_in sa4 = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; + int tmp_fd, parent_cgroup_fd = -1, child_cgroup_fd = -1; struct read_cgroupfs_xattr *skel = NULL; - pid_t pid = gettid(); - int sock_fd = -1; - int connect_fd = -1; - if (!ASSERT_OK(setup_cgroups_and_lo(), "setup_cgroups_and_lo")) + parent_cgroup_fd = test__join_cgroup(CGROUP_FS_PARENT); + if (!ASSERT_OK_FD(parent_cgroup_fd, "create parent cgroup")) return; - if (!ASSERT_OK(move_pid_to_cgroup(CGROUP_FS_CHILD, pid), - "move_pid_to_cgroup")) + if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a), + "set parent xattr")) + goto out; + + child_cgroup_fd = test__join_cgroup(CGROUP_FS_CHILD); + if (!ASSERT_OK_FD(child_cgroup_fd, "create child cgroup")) + goto out; + if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b), + "set child xattr")) goto out; skel = read_cgroupfs_xattr__open_and_load(); if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load")) goto out; - skel->bss->target_pid = pid; + skel->bss->target_pid = gettid(); if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach")) goto out; - sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); - if (!ASSERT_OK_FD(sock_fd, "sock create")) - goto out; - - connect_fd = connect(sock_fd, &sa4, sizeof(sa4)); - if (!ASSERT_OK_FD(connect_fd, "connect 1")) - goto out; - close(connect_fd); + tmp_fd = open(TMP_FILE, O_RDONLY | O_CREAT); + ASSERT_OK_FD(tmp_fd, "open tmp file"); + close(tmp_fd); ASSERT_TRUE(skel->bss->found_value_a, "found_value_a"); ASSERT_TRUE(skel->bss->found_value_b, "found_value_b"); out: - close(connect_fd); - close(sock_fd); + close(child_cgroup_fd); + close(parent_cgroup_fd); read_cgroupfs_xattr__destroy(skel); - move_pid_to_cgroup(CGROUP_FS_ROOT, pid); - reset_cgroups_and_lo(); + unlink(TMP_FILE); } void test_cgroup_xattr(void) diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 62e7ec775f24..9b2d9ceda210 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -21,6 +21,14 @@ static struct { {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, {"test_dynptr_copy", SETUP_SYSCALL_SLEEP}, {"test_dynptr_copy_xdp", SETUP_XDP_PROG}, + {"test_dynptr_memset_zero", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_notzero", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_zero_offset", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_zero_adjusted", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_overflow", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_overflow_offset", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_readonly", SETUP_SKB_PROG}, + {"test_dynptr_memset_xdp_chunks", SETUP_XDP_PROG}, {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, @@ -43,6 +51,8 @@ static struct { {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP}, }; +#define PAGE_SIZE_64K 65536 + static void verify_success(const char *prog_name, enum test_setup_type setup_type) { char user_data[384] = {[0 ... 382] = 'a', '\0'}; @@ -138,14 +148,18 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ } case SETUP_XDP_PROG: { - char data[5000]; + char data[90000]; int err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = sizeof(data), .repeat = 1, ); + if (getpagesize() == PAGE_SIZE_64K) + opts.data_size_in = sizeof(data); + else + opts.data_size_in = 5000; + prog_fd = bpf_program__fd(prog); err = bpf_prog_test_run_opts(prog_fd, &opts); diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index 9add890c2d37..241b2c8c6e0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -312,7 +312,7 @@ static void check_fd_array_cnt__referenced_btfs(void) /* btf should still exist when original file descriptor is closed */ err = get_btf_id_by_fd(extra_fds[0], &btf_id); - if (!ASSERT_GE(err, 0, "get_btf_id_by_fd")) + if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd")) goto cleanup; Close(extra_fds[0]); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c deleted file mode 100644 index 568d3aa48a78..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <test_progs.h> -#include "fexit_noreturns.skel.h" - -void test_fexit_noreturns(void) -{ - RUN_TESTS(fexit_noreturns); -} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 5266c7022863..14c5a7ef0e87 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -72,7 +72,7 @@ static struct { { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" }, { "obj_new_acq", "Unreleased reference id=" }, { "use_after_drop", "invalid mem access 'scalar'" }, - { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" }, + { "ptr_walk_scalar", "type=rdonly_untrusted_mem expected=percpu_ptr_" }, { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" }, { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" }, { "direct_read_head", "direct access to bpf_list_head is disallowed" }, diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index 169ce689b97c..d6f14a232002 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -7,6 +7,10 @@ #include "test_log_buf.skel.h" #include "bpf_util.h" +#if !defined(__clang__) +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + static size_t libbpf_log_pos; static char libbpf_log_buf[1024 * 1024]; static bool libbpf_log_error; diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c new file mode 100644 index 000000000000..40d4f687bd9c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <test_progs.h> +#include "mem_rdonly_untrusted.skel.h" + +void test_mem_rdonly_untrusted(void) +{ + RUN_TESTS(mem_rdonly_untrusted); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 39d42271cc46..e261b0e872db 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -465,6 +465,20 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, return range_improve(x_t, x, x_swap); } + if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) { + if (x_t == S64 && x.a > x.b) { + if (x.b < y.a && x.a <= y.b) + return range(x_t, x.a, y.b); + if (x.a > y.b && x.b >= y.a) + return range(x_t, y.a, x.b); + } else if (x_t == U64 && y.a > y.b) { + if (y.b < x.a && y.a <= x.b) + return range(x_t, y.a, x.b); + if (y.a > x.b && y.b >= x.a) + return range(x_t, x.a, y.b); + } + } + /* otherwise, plain range cast and intersection works */ return range_improve(x_t, x, y_cast); } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index da430df45aa4..d1e4cb28a72c 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -97,7 +97,7 @@ static void ringbuf_write_subtest(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->maps.ringbuf.max_entries = 0x4000; + skel->maps.ringbuf.max_entries = 0x40000; err = test_ringbuf_write_lskel__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -108,7 +108,7 @@ static void ringbuf_write_subtest(void) mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) goto cleanup; - *mmap_ptr = 0x3000; + *mmap_ptr = 0x30000; ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index a4517bee34d5..27781df8f2fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2024 Meta +#include <poll.h> #include <test_progs.h> #include "network_helpers.h" #include "sock_iter_batch.skel.h" #define TEST_NS "sock_iter_batch_netns" +#define TEST_CHILD_NS "sock_iter_batch_child_netns" static const int init_batch_size = 16; static const int nr_soreuse = 4; @@ -118,6 +120,45 @@ done: return nth_sock_idx; } +static void destroy(int fd) +{ + struct sock_iter_batch *skel = NULL; + __u64 cookie = socket_cookie(fd); + struct bpf_link *link = NULL; + int iter_fd = -1; + int nread; + __u64 out; + + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + goto done; + + skel->rodata->destroy_cookie = cookie; + + if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + goto done; + + /* Delete matching socket. */ + nread = read(iter_fd, &out, sizeof(out)); + ASSERT_GE(nread, 0, "nread"); + if (nread) + ASSERT_EQ(out, cookie, "cookie matches"); +done: + if (iter_fd >= 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); + close(fd); +} + static int get_seen_count(int fd, struct sock_count counts[], int n) { __u64 cookie = socket_cookie(fd); @@ -152,8 +193,71 @@ static void check_n_were_seen_once(int *fds, int fds_len, int n, ASSERT_EQ(seen_once, n, "seen_once"); } +static int accept_from_one(struct pollfd *server_poll_fds, + int server_poll_fds_len) +{ + static const int poll_timeout_ms = 5000; /* 5s */ + int ret; + int i; + + ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms); + if (!ASSERT_EQ(ret, 1, "poll")) + return -1; + + for (i = 0; i < server_poll_fds_len; i++) + if (server_poll_fds[i].revents & POLLIN) + return accept(server_poll_fds[i].fd, NULL, NULL); + + return -1; +} + +static int *connect_to_server(int family, int sock_type, const char *addr, + __u16 port, int nr_connects, int *server_fds, + int server_fds_len) +{ + struct pollfd *server_poll_fds = NULL; + int *established_socks = NULL; + int i; + + server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds)); + if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds")) + return NULL; + + for (i = 0; i < server_fds_len; i++) { + server_poll_fds[i].fd = server_fds[i]; + server_poll_fds[i].events = POLLIN; + } + + i = 0; + + established_socks = malloc(sizeof(*established_socks) * nr_connects*2); + if (!ASSERT_OK_PTR(established_socks, "established_socks")) + goto error; + + while (nr_connects--) { + established_socks[i] = connect_to_addr_str(family, sock_type, + addr, port, NULL); + if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str")) + goto error; + i++; + established_socks[i] = accept_from_one(server_poll_fds, + server_fds_len); + if (!ASSERT_OK_FD(established_socks[i], "accept_from_one")) + goto error; + i++; + } + + free(server_poll_fds); + return established_socks; +error: + free_fds(established_socks, i); + free(server_poll_fds); + return NULL; +} + static void remove_seen(int family, int sock_type, const char *addr, __u16 port, - int *socks, int socks_len, struct sock_count *counts, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { int close_idx; @@ -182,8 +286,46 @@ static void remove_seen(int family, int sock_type, const char *addr, __u16 port, counts_len); } +static void remove_seen_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Leave one established socket. */ + read_n(iter_fd, established_socks_len - 1, counts, counts_len); + + /* Close a socket we've already seen to remove it from the bucket. */ + close_idx = get_nth_socket(established_socks, established_socks_len, + link, listen_socks_len + 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + destroy(established_socks[close_idx]); + established_socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the last socket wasn't skipped and that there were no + * repeats. + */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); +} + static void remove_unseen(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { @@ -214,8 +356,54 @@ static void remove_unseen(int family, int sock_type, const char *addr, counts_len); } +static void remove_unseen_established(int family, int sock_type, + const char *addr, __u16 port, + int *listen_socks, int listen_socks_len, + int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw one established socks. */ + check_n_were_seen_once(established_socks, established_socks_len, 1, + counts, counts_len); + + /* Close what would be the next socket in the bucket to exercise the + * condition where we need to skip past the first cookie we remembered. + */ + close_idx = get_nth_socket(established_socks, established_socks_len, + link, listen_socks_len + 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + + destroy(established_socks[close_idx]); + established_socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the remaining sockets were seen exactly once and that we + * didn't repeat the socket that was already seen. + */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); +} + static void remove_all(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { @@ -242,8 +430,57 @@ static void remove_all(int family, int sock_type, const char *addr, ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); } +static void remove_all_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int *close_idx = NULL; + int i; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw one established socks. */ + check_n_were_seen_once(established_socks, established_socks_len, 1, + counts, counts_len); + + /* Close all remaining sockets to exhaust the list of saved cookies and + * exit without putting any sockets into the batch on the next read. + */ + close_idx = malloc(sizeof(int) * (established_socks_len - 1)); + if (!ASSERT_OK_PTR(close_idx, "close_idx malloc")) + return; + for (i = 0; i < established_socks_len - 1; i++) { + close_idx[i] = get_nth_socket(established_socks, + established_socks_len, link, + listen_socks_len + i); + if (!ASSERT_GE(close_idx[i], 0, "close_idx")) + return; + } + + for (i = 0; i < established_socks_len - 1; i++) { + destroy(established_socks[close_idx[i]]); + established_socks[close_idx[i]] = -1; + } + + /* Make sure there are no more sockets returned */ + ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); + free(close_idx); +} + static void add_some(int family, int sock_type, const char *addr, __u16 port, - int *socks, int socks_len, struct sock_count *counts, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { int *new_socks = NULL; @@ -271,8 +508,52 @@ done: free_fds(new_socks, socks_len); } +static void add_some_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, + int counts_len, struct bpf_link *link, + int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established_socks_len - 1 sockets. */ + read_n(iter_fd, established_socks_len - 1, counts, counts_len); + + /* Make sure we saw established_socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); + + /* Double the number of established sockets in the bucket. */ + new_socks = connect_to_server(family, sock_type, addr, port, + established_socks_len / 2, listen_socks, + listen_socks_len); + if (!ASSERT_OK_PTR(new_socks, "connect_to_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each of the original sockets was seen exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len, counts, counts_len); +done: + free_fds(new_socks, established_socks_len); +} + static void force_realloc(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { @@ -299,11 +580,32 @@ done: free_fds(new_socks, socks_len); } +static void force_realloc_established(int family, int sock_type, + const char *addr, __u16 port, + int *listen_socks, int listen_socks_len, + int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + /* Iterate through all sockets to trigger a realloc. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each socket was seen exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len, counts, counts_len); +} + struct test_case { void (*test)(int family, int sock_type, const char *addr, __u16 port, - int *socks, int socks_len, struct sock_count *counts, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd); const char *description; + int ehash_buckets; + int connections; int init_socks; int max_socks; int sock_type; @@ -358,18 +660,140 @@ static struct test_case resume_tests[] = { .family = AF_INET6, .test = force_realloc, }, + { + .description = "tcp: resume after removing a seen socket (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_seen, + }, + { + .description = "tcp: resume after removing one unseen socket (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_unseen, + }, + { + .description = "tcp: resume after removing all unseen sockets (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_all, + }, + { + .description = "tcp: resume after adding a few sockets (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + /* Use AF_INET so that new sockets are added to the head of the + * bucket's list. + */ + .family = AF_INET, + .test = add_some, + }, + { + .description = "tcp: force a realloc to occur (listening)", + .init_socks = init_batch_size, + .max_socks = init_batch_size * 2, + .sock_type = SOCK_STREAM, + /* Use AF_INET6 so that new sockets are added to the tail of the + * bucket's list, needing to be added to the next batch to force + * a realloc. + */ + .family = AF_INET6, + .test = force_realloc, + }, + { + .description = "tcp: resume after removing a seen socket (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_seen_established, + }, + { + .description = "tcp: resume after removing one unseen socket (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_unseen_established, + }, + { + .description = "tcp: resume after removing all unseen sockets (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_all_established, + }, + { + .description = "tcp: resume after adding a few sockets (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = add_some_established, + }, + { + .description = "tcp: force a realloc to occur (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + /* Bucket size will need to double when going from listening to + * established sockets. + */ + .connections = init_batch_size, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse + (init_batch_size * 2), + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = force_realloc_established, + }, }; static void do_resume_test(struct test_case *tc) { struct sock_iter_batch *skel = NULL; + struct sock_count *counts = NULL; static const __u16 port = 10001; + struct nstoken *nstoken = NULL; struct bpf_link *link = NULL; - struct sock_count *counts; + int *established_fds = NULL; int err, iter_fd = -1; const char *addr; int *fds = NULL; - int local_port; + + if (tc->ehash_buckets) { + SYS_NOFAIL("ip netns del " TEST_CHILD_NS); + SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d", + tc->ehash_buckets); + SYS(done, "ip netns add %s", TEST_CHILD_NS); + SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS); + nstoken = open_netns(TEST_CHILD_NS); + if (!ASSERT_OK_PTR(nstoken, "open_child_netns")) + goto done; + } counts = calloc(tc->max_socks, sizeof(*counts)); if (!ASSERT_OK_PTR(counts, "counts")) @@ -384,11 +808,18 @@ static void do_resume_test(struct test_case *tc) tc->init_socks); if (!ASSERT_OK_PTR(fds, "start_reuseport_server")) goto done; - local_port = get_socket_local_port(*fds); - if (!ASSERT_GE(local_port, 0, "get_socket_local_port")) - goto done; - skel->rodata->ports[0] = ntohs(local_port); + if (tc->connections) { + established_fds = connect_to_server(tc->family, tc->sock_type, + addr, port, + tc->connections, fds, + tc->init_socks); + if (!ASSERT_OK_PTR(established_fds, "connect_to_server")) + goto done; + } + skel->rodata->ports[0] = 0; + skel->rodata->ports[1] = 0; skel->rodata->sf = tc->family; + skel->rodata->ss = 0; err = sock_iter_batch__load(skel); if (!ASSERT_OK(err, "sock_iter_batch__load")) @@ -406,10 +837,15 @@ static void do_resume_test(struct test_case *tc) goto done; tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks, - counts, tc->max_socks, link, iter_fd); + established_fds, tc->connections*2, counts, tc->max_socks, + link, iter_fd); done: + close_netns(nstoken); + SYS_NOFAIL("ip netns del " TEST_CHILD_NS); + SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0"); free(counts); free_fds(fds, tc->init_socks); + free_fds(established_fds, tc->connections*2); if (iter_fd >= 0) close(iter_fd); bpf_link__destroy(link); @@ -454,6 +890,8 @@ static void do_test(int sock_type, bool onebyone) skel->rodata->ports[i] = ntohs(local_port); } skel->rodata->sf = AF_INET6; + if (sock_type == SOCK_STREAM) + skel->rodata->ss = TCP_LISTEN; err = sock_iter_batch__load(skel); if (!ASSERT_OK(err, "sock_iter_batch__load")) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index b6c471da5c28..b87e7f39e15a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -314,6 +314,95 @@ out: test_sockmap_ktls__destroy(skel); } +static void test_sockmap_ktls_tx_pop(int family, int sotype) +{ + char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0"; + int c = 0, p = 0, one = 1, sent, recvd; + struct test_sockmap_ktls *skel; + int prog_fd, map_fd; + char rcv[50] = {0}; + int err; + int i, m, r; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + struct { + int pop_start; + int pop_len; + } pop_policy[] = { + /* trim the start */ + {0, 2}, + {0, 10}, + {1, 2}, + {1, 10}, + /* trim the end */ + {35, 2}, + /* New entries should be added before this line */ + {-1, -1}, + }; + + i = 0; + while (pop_policy[i].pop_start >= 0) { + skel->bss->pop_start = pop_policy[i].pop_start; + skel->bss->pop_end = pop_policy[i].pop_len; + + sent = send(c, msg, sizeof(msg), 0); + if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch")) + goto out; + + /* verify the data + * msg: 0123456789a bcdefghij klmnopqrstuvwxyz + * | | + * popped data + */ + for (m = 0, r = 0; m < sizeof(msg);) { + /* skip checking the data that has been popped */ + if (m >= pop_policy[i].pop_start && + m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) { + m++; + continue; + } + + if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch")) + goto out; + m++; + r++; + } + i++; + } +out: + if (c) + close(c); + if (p) + close(p); + test_sockmap_ktls__destroy(skel); +} + static void run_tests(int family, enum bpf_map_type map_type) { int map; @@ -338,6 +427,8 @@ static void run_ktls_test(int family, int sotype) test_sockmap_ktls_tx_cork(family, sotype, true); if (test__start_subtest("tls tx egress with no buf")) test_sockmap_ktls_tx_no_buf(family, sotype, true); + if (test__start_subtest("tls tx with pop")) + test_sockmap_ktls_tx_pop(family, sotype); } void test_sockmap_ktls(void) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 1d98eee7a2c3..f1bdccc7e4e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map) goto close; n = xsend(c1, buf, sizeof(buf), 0); + if (n == -1) + goto close; if (n < sizeof(buf)) FAIL("incomplete write"); diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c new file mode 100644 index 000000000000..d9f0185dca61 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stream.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <sys/mman.h> +#include <regex.h> + +#include "stream.skel.h" +#include "stream_fail.skel.h" + +void test_stream_failure(void) +{ + RUN_TESTS(stream_fail); +} + +void test_stream_success(void) +{ + RUN_TESTS(stream); + return; +} + +struct { + int prog_off; + const char *errstr; +} stream_error_arr[] = { + { + offsetof(struct stream, progs.stream_cond_break), + "ERROR: Timeout detected for may_goto instruction\n" + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, + { + offsetof(struct stream, progs.stream_deadlock), + "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n" + "Attempted lock = (0x[0-9a-fA-F]+)\n" + "Total held locks = 1\n" + "Held lock\\[ 0\\] = \\1\n" // Lock address must match + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, +}; + +static int match_regex(const char *pattern, const char *string) +{ + int err, rc; + regex_t re; + + err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE); + if (err) + return -1; + rc = regexec(&re, string, 0, NULL, 0); + regfree(&re); + return rc == 0 ? 1 : 0; +} + +void test_stream_errors(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + struct stream *skel; + int ret, prog_fd; + char buf[1024]; + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) { + struct bpf_program **prog; + + prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off); + prog_fd = bpf_program__fd(*prog); + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + +#if !defined(__x86_64__) + ASSERT_TRUE(1, "Timed may_goto unsupported, skip."); + if (i == 0) { + ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts); + ASSERT_EQ(ret, 0, "stream read"); + continue; + } +#endif + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); + ASSERT_GT(ret, 0, "stream read"); + ASSERT_LE(ret, 1023, "len for buf"); + buf[ret] = '\0'; + + ret = match_regex(stream_error_arr[i].errstr, buf); + if (!ASSERT_TRUE(ret == 1, "regex match")) + fprintf(stderr, "Output from stream:\n%s\n", buf); + } + + stream__destroy(skel); +} + +void test_stream_syscall(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + struct stream *skel; + int ret, prog_fd; + char buf[64]; + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.stream_syscall); + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + + ASSERT_LT(bpf_prog_stream_read(0, BPF_STREAM_STDOUT, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EINVAL, "bad prog_fd"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, 0, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -ENOENT, "bad stream id"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, NULL, sizeof(buf), NULL), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EFAULT, "bad stream buf"); + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 2, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 1, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stdout"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stderr"); + + stream__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c new file mode 100644 index 000000000000..35af8044d059 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include <test_progs.h> +#include "string_kfuncs_success.skel.h" +#include "string_kfuncs_failure1.skel.h" +#include "string_kfuncs_failure2.skel.h" +#include <sys/mman.h> + +static const char * const test_cases[] = { + "strcmp", + "strchr", + "strchrnul", + "strnchr", + "strrchr", + "strlen", + "strnlen", + "strspn_str", + "strspn_accept", + "strcspn_str", + "strcspn_reject", + "strstr", + "strnstr", +}; + +void run_too_long_tests(void) +{ + struct string_kfuncs_failure2 *skel; + struct bpf_program *prog; + char test_name[256]; + int err, i; + + skel = string_kfuncs_failure2__open_and_load(); + if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load")) + return; + + memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str)); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + sprintf(test_name, "test_%s_too_long", test_cases[i]); + if (!test__start_subtest(test_name)) + continue; + + prog = bpf_object__find_program_by_name(skel->obj, test_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto cleanup; + + ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG"); + } + +cleanup: + string_kfuncs_failure2__destroy(skel); +} + +void test_string_kfuncs(void) +{ + RUN_TESTS(string_kfuncs_success); + RUN_TESTS(string_kfuncs_failure1); + + run_too_long_tests(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 66a900327f91..0ab36503c3b2 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1195,7 +1195,7 @@ static void test_tailcall_hierarchy_count(const char *which, bool test_fentry, bool test_fexit, bool test_fentry_entry) { - int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val; + int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val; struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL; struct bpf_link *fentry_link = NULL, *fexit_link = NULL; struct bpf_program *prog, *fentry_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 924d0e25320c..d52a62af77bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -8,34 +8,6 @@ # define loopback 1 #endif -static inline __u32 id_from_prog_fd(int fd) -{ - struct bpf_prog_info prog_info = {}; - __u32 prog_info_len = sizeof(prog_info); - int err; - - err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); - if (!ASSERT_OK(err, "id_from_prog_fd")) - return 0; - - ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); - return prog_info.id; -} - -static inline __u32 id_from_link_fd(int fd) -{ - struct bpf_link_info link_info = {}; - __u32 link_info_len = sizeof(link_info); - int err; - - err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); - if (!ASSERT_OK(err, "id_from_link_fd")) - return 0; - - ASSERT_NEQ(link_info.id, 0, "link_info.id"); - return link_info.id; -} - static inline __u32 ifindex_from_link_fd(int fd) { struct bpf_link_info link_info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index 47b56c258f3f..367f47e4a936 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -60,13 +60,19 @@ static void test_set_global_vars_succeeds(void) " -G \"var_s8 = -128\" "\ " -G \"var_u8 = 255\" "\ " -G \"var_ea = EA2\" "\ - " -G \"var_eb = EB2\" "\ - " -G \"var_ec = EC2\" "\ + " -G \"var_eb = EB2\" "\ + " -G \"var_ec=EC2\" "\ " -G \"var_b = 1\" "\ - " -G \"struct1.struct2.u.var_u8 = 170\" "\ + " -G \"struct1[2].struct2[1][2].u.var_u8[2]=170\" "\ " -G \"union1.struct3.var_u8_l = 0xaa\" "\ " -G \"union1.struct3.var_u8_h = 0xaa\" "\ - "-vl2 > %s", fix->veristat, fix->tmpfile); + " -G \"arr[3]= 171\" " \ + " -G \"arr[EA2] =172\" " \ + " -G \"enum_arr[EC2]=EA3\" " \ + " -G \"three_d[31][7][EA2]=173\"" \ + " -G \"struct1[2].struct2[1][2].u.mat[5][3]=174\" " \ + " -G \"struct11 [ 7 ] [ 5 ] .struct2[0][1].u.mat[3][0] = 175\" " \ + " -vl2 > %s", fix->veristat, fix->tmpfile); read(fix->fd, fix->output, fix->sz); __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); @@ -81,8 +87,14 @@ static void test_set_global_vars_succeeds(void) __CHECK_STR("_w=12 ", "var_eb = EB2"); __CHECK_STR("_w=13 ", "var_ec = EC2"); __CHECK_STR("_w=1 ", "var_b = 1"); - __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170"); + __CHECK_STR("_w=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170"); __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa"); + __CHECK_STR("_w=171 ", "arr[3]= 171"); + __CHECK_STR("_w=172 ", "arr[EA2] =172"); + __CHECK_STR("_w=10 ", "enum_arr[EC2]=EA3"); + __CHECK_STR("_w=173 ", "matrix[31][7][11]=173"); + __CHECK_STR("_w=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174"); + __CHECK_STR("_w=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175"); out: teardown_fixture(fix); @@ -129,6 +141,95 @@ out: teardown_fixture(fix); } +static void test_unsupported_ptr_array_type(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"ptr_arr[0] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't set ptr_arr[0]. Only ints and enums are supported", "ptr_arr"); + +out: + teardown_fixture(fix); +} + +static void test_array_out_of_bounds(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[99] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Array index 99 is out of bounds", "arr[99]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_not_found(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[EG2] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't resolve enum value EG2", "arr[EG2]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_for_non_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"var_b[0] = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for var_b", "var_b[0] = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"union1.struct3[0].var_u8_l=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for struct3", "union1.struct3[0].var_u8_l=1"); + +out: + teardown_fixture(fix); +} + +static void test_no_array_index_for_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't set arr. Only ints and enums are supported", "arr = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"struct1[0].struct2.u.var_u8[2]=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't resolve field u for non-composite type", "struct1[0].struct2.u.var_u8[2]=1"); + +out: + teardown_fixture(fix); +} + void test_veristat(void) { if (test__start_subtest("set_global_vars_succeeds")) @@ -139,6 +240,22 @@ void test_veristat(void) if (test__start_subtest("set_global_vars_from_file_succeeds")) test_set_global_vars_from_file_succeeds(); + + if (test__start_subtest("test_unsupported_ptr_array_type")) + test_unsupported_ptr_array_type(); + + if (test__start_subtest("test_array_out_of_bounds")) + test_array_out_of_bounds(); + + if (test__start_subtest("test_array_index_not_found")) + test_array_index_not_found(); + + if (test__start_subtest("test_array_index_for_non_array")) + test_array_index_for_non_array(); + + if (test__start_subtest("test_no_array_index_for_array")) + test_no_array_index_for_array(); + } #undef __CHECK_STR diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index f9392df23f8a..b81dde283052 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -115,7 +115,7 @@ static int create_bpffs_fd(void) static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) { - int mnt_fd, err; + int err; /* set up token delegation mount options */ err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); @@ -136,12 +136,7 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) if (err < 0) return -errno; - /* create O_PATH fd for detached mount */ - mnt_fd = sys_fsmount(fs_fd, 0, 0); - if (err < 0) - return -errno; - - return mnt_fd; + return 0; } /* send FD over Unix domain (AF_UNIX) socket */ @@ -287,6 +282,7 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba { int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1; struct token_lsm *lsm_skel = NULL; + char one; /* load and attach LSM "policy" before we go into unpriv userns */ lsm_skel = token_lsm__open_and_load(); @@ -333,13 +329,19 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba err = sendfd(sock_fd, fs_fd); if (!ASSERT_OK(err, "send_fs_fd")) goto cleanup; - zclose(fs_fd); + + /* wait that the parent reads the fd, does the fsconfig() calls + * and send us a signal that it is done + */ + err = read(sock_fd, &one, sizeof(one)); + if (!ASSERT_GE(err, 0, "read_one")) + goto cleanup; /* avoid mucking around with mount namespaces and mounting at - * well-known path, just get detach-mounted BPF FS fd back from parent + * well-known path, just create O_PATH fd for detached mount */ - err = recvfd(sock_fd, &mnt_fd); - if (!ASSERT_OK(err, "recv_mnt_fd")) + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (!ASSERT_OK_FD(mnt_fd, "mnt_fd")) goto cleanup; /* try to fspick() BPF FS and try to add some delegation options */ @@ -429,24 +431,24 @@ again: static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) { - int fs_fd = -1, mnt_fd = -1, token_fd = -1, err; + int fs_fd = -1, token_fd = -1, err; + char one = 1; err = recvfd(sock_fd, &fs_fd); if (!ASSERT_OK(err, "recv_bpffs_fd")) goto cleanup; - mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); - if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { + err = materialize_bpffs_fd(fs_fd, bpffs_opts); + if (!ASSERT_GE(err, 0, "materialize_bpffs_fd")) { err = -EINVAL; goto cleanup; } - zclose(fs_fd); - /* pass BPF FS context object to parent */ - err = sendfd(sock_fd, mnt_fd); - if (!ASSERT_OK(err, "send_mnt_fd")) + /* notify the child that we did the fsconfig() calls and it can proceed. */ + err = write(sock_fd, &one, sizeof(one)); + if (!ASSERT_EQ(err, sizeof(one), "send_one")) goto cleanup; - zclose(mnt_fd); + zclose(fs_fd); /* receive BPF token FD back from child for some extra tests */ err = recvfd(sock_fd, &token_fd); @@ -459,7 +461,6 @@ static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) cleanup: zclose(sock_fd); zclose(fs_fd); - zclose(mnt_fd); zclose(token_fd); if (child_pid > 0) @@ -1046,6 +1047,41 @@ err_out: #define bit(n) (1ULL << (n)) +static int userns_bpf_token_info(int mnt_fd, struct token_lsm *lsm_skel) +{ + int err, token_fd = -1; + struct bpf_token_info info; + u32 len = sizeof(struct bpf_token_info); + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + memset(&info, 0, len); + err = bpf_obj_get_info_by_fd(token_fd, &info, &len); + if (!ASSERT_ERR(err, "bpf_obj_get_token_info")) + goto cleanup; + if (!ASSERT_EQ(info.allowed_cmds, bit(BPF_MAP_CREATE), "token_info_cmds_map_create")) { + err = -EINVAL; + goto cleanup; + } + if (!ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_XDP), "token_info_progs_xdp")) { + err = -EINVAL; + goto cleanup; + } + + /* The BPF_PROG_TYPE_EXT is not set in token */ + if (ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_EXT), "token_info_progs_ext")) + err = -EINVAL; + +cleanup: + zclose(token_fd); + return err; +} + void test_token(void) { if (test__start_subtest("map_token")) { @@ -1149,4 +1185,13 @@ void test_token(void) subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); } + if (test__start_subtest("bpf_token_info")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_MAP_CREATE), + .progs = bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_bpf_token_info); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c index a222df765bc3..10e231965589 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c @@ -28,10 +28,62 @@ out: tracing_failure__destroy(skel); } +static void test_tracing_fail_prog(const char *prog_name, const char *exp_msg) +{ + struct tracing_failure *skel; + struct bpf_program *prog; + char log_buf[256]; + int err; + + skel = tracing_failure__open(); + if (!ASSERT_OK_PTR(skel, "tracing_failure__open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + + bpf_program__set_autoload(prog, true); + bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf)); + + err = tracing_failure__load(skel); + if (!ASSERT_ERR(err, "tracing_failure__load")) + goto out; + + ASSERT_HAS_SUBSTR(log_buf, exp_msg, "log_buf"); +out: + tracing_failure__destroy(skel); +} + +static void test_tracing_deny(void) +{ + int btf_id; + + /* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */ + btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY); + if (btf_id <= 0) { + test__skip(); + return; + } + + test_tracing_fail_prog("tracing_deny", + "Attaching tracing programs to function '__rcu_read_lock' is rejected."); +} + +static void test_fexit_noreturns(void) +{ + test_tracing_fail_prog("fexit_noreturns", + "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected."); +} + void test_tracing_failure(void) { if (test__start_subtest("bpf_spin_lock")) test_bpf_spin_lock(true); if (test__start_subtest("bpf_spin_unlock")) test_bpf_spin_lock(false); + if (test__start_subtest("tracing_deny")) + test_tracing_deny(); + if (test__start_subtest("fexit_noreturns")) + test_fexit_noreturns(); } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index c397336fe1ed..b17dc39a23db 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -251,7 +251,7 @@ static void test_uretprobe_syscall_call(void) .retprobe = true, ); struct uprobe_syscall_executed *skel; - int pid, status, err, go[2], c; + int pid, status, err, go[2], c = 0; if (!ASSERT_OK(pipe(go), "pipe")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 495d66414b57..9057e983cc54 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -270,8 +270,16 @@ static void subtest_multispec_usdt(void) */ trigger_300_usdts(); - /* we'll reuse usdt_100 BPF program for usdt_300 test */ bpf_link__destroy(skel->links.usdt_100); + + bss->usdt_100_called = 0; + bss->usdt_100_sum = 0; + + /* If built with arm64/clang, there will be much less number of specs + * for usdt_300 call sites. + */ +#if !defined(__aarch64__) || !defined(__clang__) + /* we'll reuse usdt_100 BPF program for usdt_300 test */ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe", "test", "usdt_300", NULL); err = -errno; @@ -282,13 +290,11 @@ static void subtest_multispec_usdt(void) /* let's check that there are no "dangling" BPF programs attached due * to partial success of the above test:usdt_300 attachment */ - bss->usdt_100_called = 0; - bss->usdt_100_sum = 0; - f300(777); /* this is 301st instance of usdt_300 */ ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called"); ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum"); +#endif /* This time we have USDT with 400 inlined invocations, but arg specs * should be the same across all sites, so libbpf will only need to diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index d424e7ecbd12..9fd3ae987321 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -21,8 +21,7 @@ #include "../progs/test_user_ringbuf.h" static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ; -static const long c_ringbuf_size = 1 << 12; /* 1 small page */ -static const long c_max_entries = c_ringbuf_size / c_sample_size; +static long c_ringbuf_size, c_max_entries; static void drain_current_samples(void) { @@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void) uint32_t remaining_samples = total_samples; int err; - BUILD_BUG_ON(total_samples <= c_max_entries); + if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries")) + return; + err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; @@ -686,6 +687,9 @@ void test_user_ringbuf(void) { int i; + c_ringbuf_size = getpagesize(); /* 1 page */ + c_max_entries = c_ringbuf_size / c_sample_size; + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { if (!test__start_subtest(success_tests[i].test_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index c9da06741104..77ec95d4ffaa 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -85,6 +85,7 @@ #include "verifier_store_release.skel.h" #include "verifier_subprog_precision.skel.h" #include "verifier_subreg.skel.h" +#include "verifier_tailcall.skel.h" #include "verifier_tailcall_jit.skel.h" #include "verifier_typedef.skel.h" #include "verifier_uninit.skel.h" @@ -219,6 +220,7 @@ void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } void test_verifier_store_release(void) { RUN(verifier_store_release); } void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } void test_verifier_subreg(void) { RUN(verifier_subreg); } +void test_verifier_tailcall(void) { RUN(verifier_tailcall); } void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); } void test_verifier_typedef(void) { RUN(verifier_typedef); } void test_verifier_uninit(void) { RUN(verifier_uninit); } diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c index ab0f02faa80c..4d69d9d55e17 100644 --- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -268,7 +268,7 @@ static void test_verify_pkcs7_sig_from_map(void) char *tmp_dir; struct test_verify_pkcs7_sig *skel = NULL; struct bpf_map *map; - struct data data; + struct data data = {}; int ret, zero = 0; /* Trigger creation of session keyring. */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index b2b2d85dbb1b..43264347e7d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -37,21 +37,26 @@ static void test_xdp_adjust_tail_shrink(void) bpf_object__close(obj); } -static void test_xdp_adjust_tail_grow(void) +static void test_xdp_adjust_tail_grow(bool is_64k_pagesize) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; struct bpf_object *obj; - char buf[4096]; /* avoid segfault: large buf to hold grow results */ + char buf[8192]; /* avoid segfault: large buf to hold grow results */ __u32 expect_sz; int err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), .data_out = buf, .data_size_out = sizeof(buf), .repeat = 1, ); + /* topts.data_size_in as a special signal to bpf prog */ + if (is_64k_pagesize) + topts.data_size_in = sizeof(pkt_v4) - 1; + else + topts.data_size_in = sizeof(pkt_v4); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; @@ -208,7 +213,7 @@ out: bpf_object__close(obj); } -static void test_xdp_adjust_frags_tail_grow(void) +static void test_xdp_adjust_frags_tail_grow_4k(void) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; __u32 exp_size; @@ -246,14 +251,20 @@ static void test_xdp_adjust_frags_tail_grow(void) ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval"); ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size"); - for (i = 0; i < 9000; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + for (i = 0; i < 9000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + } - for (i = 9000; i < 9010; i++) - ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + for (i = 9000; i < 9010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + } - for (i = 9010; i < 16384; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + for (i = 9010; i < 16384; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + } /* Test a too large grow */ memset(buf, 1, 16384); @@ -273,16 +284,93 @@ out: bpf_object__close(obj); } +static void test_xdp_adjust_frags_tail_grow_64k(void) +{ + const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; + __u32 exp_size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, i, prog_fd; + __u8 *buf; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + goto out; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(262144); + if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb")) + goto out; + + /* Test case add 10 bytes to last frag */ + memset(buf, 1, 262144); + exp_size = 90000 + 10; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = 90000; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + for (i = 0; i < 90000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-old"); + } + + for (i = 90000; i < 90010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "90Kb+10b-new"); + } + + for (i = 90010; i < 262144; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched"); + } + + /* Test a too large grow */ + memset(buf, 1, 262144); + exp_size = 90001; + + topts.data_in = topts.data_out = buf; + topts.data_size_in = 90001; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + free(buf); +out: + bpf_object__close(obj); +} + void test_xdp_adjust_tail(void) { + int page_size = getpagesize(); + if (test__start_subtest("xdp_adjust_tail_shrink")) test_xdp_adjust_tail_shrink(); if (test__start_subtest("xdp_adjust_tail_grow")) - test_xdp_adjust_tail_grow(); + test_xdp_adjust_tail_grow(page_size == 65536); if (test__start_subtest("xdp_adjust_tail_grow2")) test_xdp_adjust_tail_grow2(); if (test__start_subtest("xdp_adjust_frags_tail_shrink")) test_xdp_adjust_frags_tail_shrink(); - if (test__start_subtest("xdp_adjust_frags_tail_grow")) - test_xdp_adjust_frags_tail_grow(); + if (test__start_subtest("xdp_adjust_frags_tail_grow")) { + if (page_size == 65536) + test_xdp_adjust_frags_tail_grow_64k(); + else + test_xdp_adjust_frags_tail_grow_4k(); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 7dac044664ac..dd34b0cc4b4e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -66,16 +66,25 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd) #else #define MAX_PKT_SIZE 3408 #endif + +#define PAGE_SIZE_4K 4096 +#define PAGE_SIZE_64K 65536 + static void test_max_pkt_size(int fd) { - char data[MAX_PKT_SIZE + 1] = {}; + char data[PAGE_SIZE_64K + 1] = {}; int err; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = MAX_PKT_SIZE, .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); + + if (getpagesize() == PAGE_SIZE_64K) + opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K; + else + opts.data_size_in = MAX_PKT_SIZE; + err = bpf_prog_test_run_opts(fd, &opts); ASSERT_OK(err, "prog_run_max_size"); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c new file mode 100644 index 000000000000..2f20485e0de3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u32 value_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_map_values(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 value = 0; + + if (ctx->value == (void *)0) + return 0; + + bpf_probe_read_kernel(&value, sizeof(value), ctx->value); + value_sum += value; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 6e208e24ba3b..530752ddde8e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -83,9 +83,11 @@ * expect return value to match passed parameter: * - a decimal number * - a hexadecimal number, when starts from 0x - * - literal INT_MIN - * - literal POINTER_VALUE (see definition below) - * - literal TEST_DATA_LEN (see definition below) + * - a macro which expands to one of the above + * - literal _INT_MIN (expands to INT_MIN) + * In addition, two special macros are defined below: + * - POINTER_VALUE + * - TEST_DATA_LEN * __retval_unpriv Same, but load program in unprivileged mode. * * __description Text to be used instead of a program name for display @@ -125,8 +127,8 @@ #define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) #define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag))) -#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val))) -#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val))) +#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="XSTR(val)))) +#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val)))) #define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary"))) #define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv"))) #define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path))) @@ -155,7 +157,7 @@ #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) /* Magic constants used with __retval() */ -#define POINTER_VALUE 0xcafe4all +#define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 #ifndef __used @@ -231,4 +233,12 @@ #define CAN_USE_LOAD_ACQ_STORE_REL #endif +#if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) +#define SPEC_V1 +#endif + +#if defined(__TARGET_ARCH_x86) +#define SPEC_V4 +#endif + #endif diff --git a/tools/testing/selftests/bpf/progs/cgroup_mprog.c b/tools/testing/selftests/bpf/progs/cgroup_mprog.c new file mode 100644 index 000000000000..6a0ea02c4de2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_mprog.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("cgroup/getsockopt") +int getsockopt_1(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_2(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_3(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_4(struct bpf_sockopt *ctx) +{ + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c index f3d79aecbf93..6884ab99a421 100644 --- a/tools/testing/selftests/bpf/progs/compute_live_registers.c +++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c @@ -240,6 +240,22 @@ __naked void if2(void) ::: __clobber_all); } +/* Verifier misses that r2 is alive if jset is not handled properly */ +SEC("socket") +__log_level(2) +__msg("2: 012....... (45) if r1 & 0x7 goto pc+1") +__naked void if3_jset_bug(void) +{ + asm volatile ( + "r0 = 1;" + "r2 = 2;" + "if r1 & 0x7 goto +1;" + "exit;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + SEC("socket") __log_level(2) __msg("0: .......... (b7) r1 = 0") diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index a0391f9da2d4..8315273cb900 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -9,6 +9,8 @@ #include "bpf_misc.h" #include "errno.h" +#define PAGE_SIZE_64K 65536 + char _license[] SEC("license") = "GPL"; int pid, err, val; @@ -611,11 +613,12 @@ int test_dynptr_copy_xdp(struct xdp_md *xdp) struct bpf_dynptr ptr_buf, ptr_xdp; char data[] = "qwertyuiopasdfghjkl"; char buf[32] = {'\0'}; - __u32 len = sizeof(data); + __u32 len = sizeof(data), xdp_data_size; int i, chunks = 200; /* ptr_xdp is backed by non-contiguous memory */ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + xdp_data_size = bpf_dynptr_size(&ptr_xdp); bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf); /* Destination dynptr is backed by non-contiguous memory */ @@ -673,7 +676,7 @@ int test_dynptr_copy_xdp(struct xdp_md *xdp) goto out; } - if (bpf_dynptr_copy(&ptr_xdp, 2000, &ptr_xdp, 0, len * chunks) != -E2BIG) + if (bpf_dynptr_copy(&ptr_xdp, xdp_data_size - 3000, &ptr_xdp, 0, len * chunks) != -E2BIG) err = 1; out: @@ -681,6 +684,173 @@ out: return XDP_DROP; } +char memset_zero_data[] = "data to be zeroed"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_zero(void *ctx) +{ + __u32 data_sz = sizeof(memset_zero_data); + char zeroes[32] = {'\0'}; + struct bpf_dynptr ptr; + + err = bpf_dynptr_from_mem(memset_zero_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, 0); + err = err ?: bpf_memcmp(zeroes, memset_zero_data, data_sz); + + return 0; +} + +#define DYNPTR_MEMSET_VAL 42 + +char memset_notzero_data[] = "data to be overwritten"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_notzero(void *ctx) +{ + u32 data_sz = sizeof(memset_notzero_data); + struct bpf_dynptr ptr; + char expected[32]; + + __builtin_memset(expected, DYNPTR_MEMSET_VAL, data_sz); + + err = bpf_dynptr_from_mem(memset_notzero_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, DYNPTR_MEMSET_VAL); + err = err ?: bpf_memcmp(expected, memset_notzero_data, data_sz); + + return 0; +} + +char memset_zero_offset_data[] = "data to be zeroed partially"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_zero_offset(void *ctx) +{ + char expected[] = "data to \0\0\0\0eroed partially"; + __u32 data_sz = sizeof(memset_zero_offset_data); + struct bpf_dynptr ptr; + + err = bpf_dynptr_from_mem(memset_zero_offset_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_memset(&ptr, 8, 4, 0); + err = err ?: bpf_memcmp(expected, memset_zero_offset_data, data_sz); + + return 0; +} + +char memset_zero_adjusted_data[] = "data to be zeroed partially"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_zero_adjusted(void *ctx) +{ + char expected[] = "data\0\0\0\0be zeroed partially"; + __u32 data_sz = sizeof(memset_zero_adjusted_data); + struct bpf_dynptr ptr; + + err = bpf_dynptr_from_mem(memset_zero_adjusted_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_adjust(&ptr, 4, 8); + err = err ?: bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0); + err = err ?: bpf_memcmp(expected, memset_zero_adjusted_data, data_sz); + + return 0; +} + +char memset_overflow_data[] = "memset overflow data"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_overflow(void *ctx) +{ + __u32 data_sz = sizeof(memset_overflow_data); + struct bpf_dynptr ptr; + int ret; + + err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr); + ret = bpf_dynptr_memset(&ptr, 0, data_sz + 1, 0); + if (ret != -E2BIG) + err = 1; + + return 0; +} + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_overflow_offset(void *ctx) +{ + __u32 data_sz = sizeof(memset_overflow_data); + struct bpf_dynptr ptr; + int ret; + + err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr); + ret = bpf_dynptr_memset(&ptr, 1, data_sz, 0); + if (ret != -E2BIG) + err = 1; + + return 0; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_memset_readonly(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + int ret; + + err = bpf_dynptr_from_skb(skb, 0, &ptr); + + /* cgroup skbs are read only, memset should fail */ + ret = bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0); + if (ret != -EINVAL) + err = 1; + + return 0; +} + +#define min_t(type, x, y) ({ \ + type __x = (x); \ + type __y = (y); \ + __x < __y ? __x : __y; }) + +SEC("xdp") +int test_dynptr_memset_xdp_chunks(struct xdp_md *xdp) +{ + u32 data_sz, chunk_sz, offset = 0; + const int max_chunks = 200; + struct bpf_dynptr ptr_xdp; + char expected_buf[32]; + char buf[32]; + int i; + + __builtin_memset(expected_buf, DYNPTR_MEMSET_VAL, sizeof(expected_buf)); + + /* ptr_xdp is backed by non-contiguous memory */ + bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + data_sz = bpf_dynptr_size(&ptr_xdp); + + err = bpf_dynptr_memset(&ptr_xdp, 0, data_sz, DYNPTR_MEMSET_VAL); + if (err) { + /* bpf_dynptr_memset() eventually called bpf_xdp_pointer() + * where if data_sz is greater than 0xffff, -EFAULT will be + * returned. For 64K page size, data_sz is greater than + * 64K, so error is expected and let us zero out error and + * return success. + */ + if (data_sz >= PAGE_SIZE_64K) + err = 0; + goto out; + } + + bpf_for(i, 0, max_chunks) { + offset = i * sizeof(buf); + if (offset >= data_sz) + goto out; + chunk_sz = min_t(u32, sizeof(buf), data_sz - offset); + err = bpf_dynptr_read(&buf, chunk_sz, &ptr_xdp, offset, 0); + if (err) + goto out; + err = bpf_memcmp(buf, expected_buf, sizeof(buf)); + if (err) + goto out; + } +out: + return XDP_DROP; +} + void *user_ptr; /* Contains the copy of the data pointed by user_ptr. * Size 384 to make it not fit into a single kernel chunk when copying diff --git a/tools/testing/selftests/bpf/progs/fexit_noreturns.c b/tools/testing/selftests/bpf/progs/fexit_noreturns.c deleted file mode 100644 index 54654539f550..000000000000 --- a/tools/testing/selftests/bpf/progs/fexit_noreturns.c +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> -#include "bpf_misc.h" - -char _license[] SEC("license") = "GPL"; - -SEC("fexit/do_exit") -__failure __msg("Attaching fexit/fmod_ret to __noreturn functions is rejected.") -int BPF_PROG(noreturns) -{ - return 0; -} diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 76adf4a8f2da..7dd92a303bf6 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1649,4 +1649,281 @@ int clean_live_states(const void *ctx) return 0; } +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "call noop;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int noop(void) +{ + asm volatile ( + "r0 = 0;" + "exit;" + ); +} + +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state2(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) { + * r6 = -31; + * jump_into_loop: + * goto +0; + * goto loop; + * } + * if (unlikely(bpf_get_prandom_u32())) + * goto jump_into_loop; + * loop: + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto jump_into_loop_%=;" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "jump_into_loop_%=: " + "goto +0;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state3(void) +{ + /* + * bpf_iter_num_new(&fp[-8], 0, 10) + * loop1(-32, &fp[-8]) + * loop1_wrapper(&fp[-8]) + * bpf_iter_num_destroy(&fp[-8]) + */ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + /* call #1 */ + "r1 = -32;" + "r2 = r10;" + "r2 += -8;" + "call loop1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + /* call #2 */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r1 = r10;" + "r1 += -8;" + "call loop1_wrapper;" + /* return */ + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int loop1(void) +{ + /* + * int loop1(num, iter) { + * r6 = num; + * r7 = iter; + * while (bpf_iter_num_next(r7)) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * return 0 + * } + */ + asm volatile ( + "r6 = r1;" + "r7 = r2;" + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "loop_%=:" + "r1 = r7;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_next), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int loop1_wrapper(void) +{ + /* + * int loop1_wrapper(iter) { + * r6 = -32; + * r7 = iter; + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * loop1(r6, r7); + * return 0; + * } + */ + asm volatile ( + "r6 = -32;" + "r7 = r1;" + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "r1 = r6;" + "r2 = r7;" + "call loop1;" + "r0 = 0;" + "exit;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_next), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c new file mode 100644 index 000000000000..4f94c971ae86 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +SEC("tp_btf/sys_enter") +__success +__log_level(2) +__msg("r8 = *(u64 *)(r7 +0) ; R7_w=ptr_nameidata(off={{[0-9]+}}) R8_w=rdonly_untrusted_mem(sz=0)") +__msg("r9 = *(u8 *)(r8 +0) ; R8_w=rdonly_untrusted_mem(sz=0) R9_w=scalar") +int btf_id_to_ptr_mem(void *ctx) +{ + struct task_struct *task; + struct nameidata *idata; + u64 ret, off; + + task = bpf_get_current_task_btf(); + idata = task->nameidata; + off = bpf_core_field_offset(struct nameidata, pathname); + /* + * asm block to have reliable match target for __msg, equivalent of: + * ret = task->nameidata->pathname[0]; + */ + asm volatile ( + "r7 = %[idata];" + "r7 += %[off];" + "r8 = *(u64 *)(r7 + 0);" + "r9 = *(u8 *)(r8 + 0);" + "%[ret] = r9;" + : [ret]"=r"(ret) + : [idata]"r"(idata), + [off]"r"(off) + : "r7", "r8", "r9"); + return ret; +} + +SEC("socket") +__success +__retval(0) +int ldx_is_ok_bad_addr(void *ctx) +{ + char *p; + + if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID)) + return 42; + + p = bpf_rdonly_cast(0, 0); + return p[0x7fff]; +} + +SEC("socket") +__success +__retval(1) +int ldx_is_ok_good_addr(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + return *p; +} + +SEC("socket") +__success +int offset_not_tracked(void *ctx) +{ + int *p, i, s; + + p = bpf_rdonly_cast(0, 0); + s = 0; + bpf_for(i, 0, 1000 * 1000 * 1000) { + p++; + s += *p; + } + return s; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int stx_not_ok(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + *p = 1; + return 0; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int atomic_not_ok(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + __sync_fetch_and_add(p, 1); + return 0; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int atomic_rmw_not_ok(void *ctx) +{ + long v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + return __sync_val_compare_and_swap(p, 0, 42); +} + +SEC("socket") +__failure +__msg("invalid access to memory, mem_size=0 off=0 size=4") +__msg("R1 min value is outside of the allowed memory range") +int kfunc_param_not_ok(void *ctx) +{ + int *p; + + p = bpf_rdonly_cast(0, 0); + bpf_kfunc_trusted_num_test(p); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure +__msg("R1 type=rdonly_untrusted_mem expected=") +int helper_param_not_ok(void *ctx) +{ + char *p; + + p = bpf_rdonly_cast(0, 0); + /* + * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do, + * the most permissive constraint + */ + bpf_copy_from_user(p, 0, (void *)42); + return 0; +} + +static __noinline u64 *get_some_addr(void) +{ + if (bpf_get_prandom_u32()) + return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock)); + else + return bpf_rdonly_cast(0, 0); +} + +SEC("socket") +__success +__retval(0) +int mixed_mem_type(void *ctx) +{ + u64 *p; + + /* Try to avoid compiler hoisting load to if branches by using __noinline func. */ + p = get_some_addr(); + return *p; +} + +__attribute__((__aligned__(8))) +u8 global[] = { + 0x11, 0x22, 0x33, 0x44, + 0x55, 0x66, 0x77, 0x88, + 0x99 +}; + +__always_inline +static u64 combine(void *p) +{ + u64 acc; + + acc = 0; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + acc |= (*(u64 *)p >> 56) << 24; + acc |= (*(u32 *)p >> 24) << 16; + acc |= (*(u16 *)p >> 8) << 8; + acc |= *(u8 *)p; +#else + acc |= (*(u64 *)p & 0xff) << 24; + acc |= (*(u32 *)p & 0xff) << 16; + acc |= (*(u16 *)p & 0xff) << 8; + acc |= *(u8 *)p; +#endif + return acc; +} + +SEC("socket") +__retval(0x88442211) +int diff_size_access(void *ctx) +{ + return combine(bpf_rdonly_cast(&global, 0)); +} + +SEC("socket") +__retval(0x99553322) +int misaligned_access(void *ctx) +{ + return combine(bpf_rdonly_cast(&global, 0) + 1); +} + +__weak int return_one(void) +{ + return 1; +} + +SEC("socket") +__success +__retval(1) +int null_check(void *ctx) +{ + int *p; + + p = bpf_rdonly_cast(0, 0); + if (p == 0) + /* make this a function call to avoid compiler + * moving r0 assignment before check. + */ + return return_one(); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index a3620c15c136..49fe93d7e059 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -61,19 +61,19 @@ static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock) } m->key = 1; - bpf_spin_lock(&glock); - bpf_rbtree_add(&groot, &n->node, less); - bpf_rbtree_add(&groot, &m->node, less); - bpf_spin_unlock(&glock); + bpf_spin_lock(lock); + bpf_rbtree_add(root, &n->node, less); + bpf_rbtree_add(root, &m->node, less); + bpf_spin_unlock(lock); n = bpf_obj_new(typeof(*n)); if (!n) return 3; n->key = 3; - bpf_spin_lock(&glock); - bpf_rbtree_add(&groot, &n->node, less); - bpf_spin_unlock(&glock); + bpf_spin_lock(lock); + bpf_rbtree_add(root, &n->node, less); + bpf_spin_unlock(lock); return 0; } diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 43637ee2cdcd..3a868a199349 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -16,10 +16,11 @@ struct { __type(value, long); } map_a SEC(".maps"); -__u32 user_data, key_serial, target_pid; +__u32 user_data, target_pid; +__s32 key_serial; __u64 flags, task_storage_val, cgroup_id; -struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; void bpf_key_put(struct bpf_key *key) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c index 855f85fc5522..405adbe5e8b0 100644 --- a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c +++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c @@ -17,8 +17,8 @@ static const char expected_value_b[] = "bpf_selftest_value_b"; bool found_value_a; bool found_value_b; -SEC("lsm.s/socket_connect") -int BPF_PROG(test_socket_connect) +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open) { u64 cgrp_id = bpf_get_current_cgroup_id(); struct cgroup_subsys_state *css, *tmp; diff --git a/tools/testing/selftests/bpf/progs/security_bpf_map.c b/tools/testing/selftests/bpf/progs/security_bpf_map.c new file mode 100644 index 000000000000..7216b3450e96 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/security_bpf_map.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +#define EPERM 1 /* Operation not permitted */ + +/* From include/linux/mm.h. */ +#define FMODE_WRITE 0x2 + +struct map; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} prot_status_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} prot_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} not_prot_map SEC(".maps"); + +SEC("fmod_ret/security_bpf_map") +int BPF_PROG(fmod_bpf_map, struct bpf_map *map, int fmode) +{ + __u32 key = 0; + __u32 *status_ptr = bpf_map_lookup_elem(&prot_status_map, &key); + + if (!status_ptr || !*status_ptr) + return 0; + + if (map == &prot_map) { + /* Allow read-only access */ + if (fmode & FMODE_WRITE) + return -EPERM; + } + + return 0; +} + +/* + * This program keeps references to maps. This is needed to prevent + * optimizing them out. + */ +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(fentry_dummy1, int a) +{ + __u32 key = 0; + __u32 val1 = a; + __u32 val2 = a + 1; + + bpf_map_update_elem(&prot_map, &key, &val1, BPF_ANY); + bpf_map_update_elem(¬_prot_map, &key, &val2, BPF_ANY); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c index 90f5656c3991..ebaef28b2cb3 100644 --- a/tools/testing/selftests/bpf/progs/set_global_vars.c +++ b/tools/testing/selftests/bpf/progs/set_global_vars.c @@ -7,22 +7,30 @@ char _license[] SEC("license") = "GPL"; -enum Enum { EA1 = 0, EA2 = 11 }; +typedef __s32 s32; +typedef s32 i32; +typedef __u8 u8; + +enum Enum { EA1 = 0, EA2 = 11, EA3 = 10 }; enum Enumu64 {EB1 = 0llu, EB2 = 12llu }; enum Enums64 { EC1 = 0ll, EC2 = 13ll }; const volatile __s64 var_s64 = -1; const volatile __u64 var_u64 = 0; -const volatile __s32 var_s32 = -1; +const volatile i32 var_s32 = -1; const volatile __u32 var_u32 = 0; const volatile __s16 var_s16 = -1; const volatile __u16 var_u16 = 0; const volatile __s8 var_s8 = -1; -const volatile __u8 var_u8 = 0; +const volatile u8 var_u8 = 0; const volatile enum Enum var_ea = EA1; const volatile enum Enumu64 var_eb = EB1; const volatile enum Enums64 var_ec = EC1; const volatile bool var_b = false; +const volatile i32 arr[32]; +const volatile enum Enum enum_arr[32]; +const volatile i32 three_d[47][19][17]; +const volatile i32 *ptr_arr[32]; struct Struct { int:16; @@ -35,34 +43,38 @@ struct Struct { volatile struct { const int:1; union { - const volatile __u8 var_u8; + const volatile u8 var_u8[3]; const volatile __s16 filler3; const int:1; + s32 mat[7][5]; } u; }; - } struct2; + } struct2[2][4]; }; const volatile __u32 stru = 0; /* same prefix as below */ -const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}}; +const volatile struct Struct struct1[3]; +const volatile struct Struct struct11[11][7]; -union Union { - __u16 var_u16; - struct Struct3 { - struct { - __u8 var_u8_l; - }; +struct Struct3 { + struct { + u8 var_u8_l; + }; + struct { struct { - struct { - __u8 var_u8_h; - }; + u8 var_u8_h; }; - } struct3; + }; }; -const volatile union Union union1 = {.var_u16 = -1}; +typedef struct Struct3 Struct3_t; -char arr[4] = {0}; +union Union { + __u16 var_u16; + Struct3_t struct3; +}; + +const volatile union Union union1 = {.var_u16 = -1}; SEC("socket") int test_set_globals(void *ctx) @@ -81,8 +93,14 @@ int test_set_globals(void *ctx) a = var_eb; a = var_ec; a = var_b; - a = struct1.struct2.u.var_u8; + a = struct1[2].struct2[1][2].u.var_u8[2]; a = union1.var_u16; + a = arr[3]; + a = arr[EA2]; + a = enum_arr[EC2]; + a = three_d[31][7][EA2]; + a = struct1[2].struct2[1][2].u.mat[5][3]; + a = struct11[7][5].struct2[0][1].u.mat[3][0]; return a; } diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c index 8f483337e103..77966ded5467 100644 --- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -23,6 +23,7 @@ static bool ipv4_addr_loopback(__be32 a) } volatile const unsigned int sf; +volatile const unsigned int ss; volatile const __u16 ports[2]; unsigned int bucket[2]; @@ -42,16 +43,18 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != sf || - sk->sk_state != TCP_LISTEN || - sk->sk_family == AF_INET6 ? + (ss && sk->sk_state != ss) || + (sk->sk_family == AF_INET6 ? !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : - !ipv4_addr_loopback(sk->sk_rcv_saddr)) + !ipv4_addr_loopback(sk->sk_rcv_saddr))) return 0; if (sk->sk_num == ports[0]) idx = 0; else if (sk->sk_num == ports[1]) idx = 1; + else if (!ports[0] && !ports[1]) + idx = 0; else return 0; @@ -67,6 +70,27 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) return 0; } +volatile const __u64 destroy_cookie; + +SEC("iter/tcp") +int iter_tcp_destroy(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = (struct sock_common *)ctx->sk_common; + __u64 sock_cookie; + + if (!sk_common) + return 0; + + sock_cookie = bpf_get_socket_cookie(sk_common); + if (sock_cookie != destroy_cookie) + return 0; + + bpf_sock_destroy(sk_common); + bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie)); + + return 0; +} + #define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk) SEC("iter/udp") @@ -83,15 +107,17 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != sf || - sk->sk_family == AF_INET6 ? + (sk->sk_family == AF_INET6 ? !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : - !ipv4_addr_loopback(sk->sk_rcv_saddr)) + !ipv4_addr_loopback(sk->sk_rcv_saddr))) return 0; if (sk->sk_num == ports[0]) idx = 0; else if (sk->sk_num == ports[1]) idx = 1; + else if (!ports[0] && !ports[1]) + idx = 0; else return 0; diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c new file mode 100644 index 000000000000..35790897dc87 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stream.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct arr_elem { + struct bpf_res_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct arr_elem); +} arrmap SEC(".maps"); + +#define ENOSPC 28 +#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +int size; + +SEC("syscall") +__success __retval(0) +int stream_exhaust(void *ctx) +{ + /* Use global variable for loop convergence. */ + size = 0; + bpf_repeat(BPF_MAX_LOOPS) { + if (bpf_stream_printk(BPF_STDOUT, _STR) == -ENOSPC && size == 99954) + return 0; + size += sizeof(_STR) - 1; + } + return 1; +} + +SEC("syscall") +__success __retval(0) +int stream_cond_break(void *ctx) +{ + while (can_loop) + ; + return 0; +} + +SEC("syscall") +__success __retval(0) +int stream_deadlock(void *ctx) +{ + struct bpf_res_spin_lock *lock, *nlock; + + lock = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!lock) + return 1; + nlock = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!nlock) + return 1; + if (bpf_res_spin_lock(lock)) + return 1; + if (bpf_res_spin_lock(nlock)) { + bpf_res_spin_unlock(lock); + return 0; + } + bpf_res_spin_unlock(nlock); + bpf_res_spin_unlock(lock); + return 1; +} + +SEC("syscall") +__success __retval(0) +int stream_syscall(void *ctx) +{ + bpf_stream_printk(BPF_STDOUT, "foo"); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c new file mode 100644 index 000000000000..b4a0d0cc8ec8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stream_fail.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +SEC("syscall") +__failure __msg("Possibly NULL pointer passed") +int stream_vprintk_null_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL); + return 0; +} + +SEC("syscall") +__failure __msg("R3 type=scalar expected=") +int stream_vprintk_scalar_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL); + return 0; +} + +SEC("syscall") +__failure __msg("arg#1 doesn't point to a const string") +int stream_vprintk_string_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c new file mode 100644 index 000000000000..53af438bd998 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <linux/limits.h> +#include "bpf_misc.h" +#include "errno.h" + +char *user_ptr = (char *)1; +char *invalid_kern_ptr = (char *)-1; + +/* + * When passing userspace pointers, the error code differs based on arch: + * -ERANGE on arches with non-overlapping address spaces + * -EFAULT on other arches + */ +#if defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_loongarch) || \ + defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_x86) +#define USER_PTR_ERR -ERANGE +#else +#define USER_PTR_ERR -EFAULT +#endif + +/* + * On s390, __get_kernel_nofault (used in string kfuncs) returns 0 for NULL and + * user_ptr (instead of causing an exception) so the below two groups of tests + * are not applicable. + */ +#ifndef __TARGET_ARCH_s390 + +/* Passing NULL to string kfuncs (treated as a userspace ptr) */ +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strrchr_null(void *ctx) { return bpf_strrchr(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strlen_null(void *ctx) { return bpf_strlen(NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnlen_null(void *ctx) { return bpf_strnlen(NULL, 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null1(void *ctx) { return bpf_strspn(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null2(void *ctx) { return bpf_strspn("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return bpf_strcspn(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); } + +/* Passing userspace ptr to string kfuncs */ +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strrchr_user_ptr(void *ctx) { return bpf_strrchr(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strlen_user_ptr(void *ctx) { return bpf_strlen(user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnlen_user_ptr(void *ctx) { return bpf_strnlen(user_ptr, 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr1(void *ctx) { return bpf_strspn(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr2(void *ctx) { return bpf_strspn("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { return bpf_strcspn(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); } + +#endif /* __TARGET_ARCH_s390 */ + +/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */ +SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strrchr_pagefault(void *ctx) { return bpf_strrchr(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strlen_pagefault(void *ctx) { return bpf_strlen(invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strnlen_pagefault(void *ctx) { return bpf_strnlen(invalid_kern_ptr, 1); } +SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault1(void *ctx) { return bpf_strspn(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault2(void *ctx) { return bpf_strspn("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return bpf_strcspn(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); } +SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c new file mode 100644 index 000000000000..89fb4669b0e9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <linux/limits.h> + +char long_str[XATTR_SIZE_MAX + 1]; + +SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); } +SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); } +SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); } +SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); } +SEC("syscall") int test_strrchr_too_long(void *ctx) { return bpf_strrchr(long_str, 'b'); } +SEC("syscall") int test_strlen_too_long(void *ctx) { return bpf_strlen(long_str); } +SEC("syscall") int test_strnlen_too_long(void *ctx) { return bpf_strnlen(long_str, sizeof(long_str)); } +SEC("syscall") int test_strspn_str_too_long(void *ctx) { return bpf_strspn(long_str, "a"); } +SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b", long_str); } +SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); } +SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); } +SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); } +SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c new file mode 100644 index 000000000000..46697f381878 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "errno.h" + +char str[] = "hello world"; + +#define __test(retval) SEC("syscall") __success __retval(retval) + +/* Functional tests */ +__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); } +__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); } +__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); } +__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); } +__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); } +__test(1) int test_strchrnul_found(void *ctx) { return bpf_strchrnul(str, 'e'); } +__test(11) int test_strchrnul_notfound(void *ctx) { return bpf_strchrnul(str, 'x'); } +__test(1) int test_strnchr_found(void *ctx) { return bpf_strnchr(str, 5, 'e'); } +__test(11) int test_strnchr_null(void *ctx) { return bpf_strnchr(str, 12, '\0'); } +__test(-ENOENT) int test_strnchr_notfound(void *ctx) { return bpf_strnchr(str, 5, 'w'); } +__test(9) int test_strrchr_found(void *ctx) { return bpf_strrchr(str, 'l'); } +__test(11) int test_strrchr_null(void *ctx) { return bpf_strrchr(str, '\0'); } +__test(-ENOENT) int test_strrchr_notfound(void *ctx) { return bpf_strrchr(str, 'x'); } +__test(11) int test_strlen(void *ctx) { return bpf_strlen(str); } +__test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); } +__test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); } +__test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); } +__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); } +__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); } +__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); } +__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); } +__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c index 0e4d2ff63ab8..dbe646013811 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) bool skip __attribute((__section__(".data"))) = false; #else bool skip = true; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c index 58d5d8dc2235..3d89ad7cbe2a 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) bool skip __attribute((__section__(".data"))) = false; #else bool skip = true; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c index 31e58389bb8b..b1f6d7e5a8e5 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) bool skip __attribute((__section__(".data"))) = false; #else bool skip = true; diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c index cdbbb12f1491..1f7e1e59b073 100644 --- a/tools/testing/selftests/bpf/progs/test_lookup_key.c +++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c @@ -14,11 +14,11 @@ char _license[] SEC("license") = "GPL"; __u32 monitored_pid; -__u32 key_serial; +__s32 key_serial; __u32 key_id; __u64 flags; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c index 350513c0e4c9..f063a0013f85 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -26,11 +26,11 @@ int test_ringbuf_write(void *ctx) if (cur_pid != pid) return 0; - sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample1) return 0; /* first one can pass */ - sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample2) { bpf_ringbuf_discard(sample1, 0); __sync_fetch_and_add(&discarded, 1); diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c index 8ef6b39335b6..34b30e2603f0 100644 --- a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c +++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c @@ -40,7 +40,7 @@ char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; __u32 monitored_pid; char sig[MAX_SIG_SIZE]; __u32 sig_size; -__u32 user_keyring_serial; +__s32 user_keyring_serial; SEC("lsm.s/file_open") int BPF_PROG(test_file_open, struct file *f) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c index 2796dd8545eb..1c7941a4ad00 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c @@ -1,8 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 ByteDance */ -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +#endif +#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2) + struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 1); @@ -31,7 +36,7 @@ int prog_skb_verdict(struct __sk_buff *skb) change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0); return SK_PASS; } else if (data[0] == 'E') { /* Error */ - change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0); return SK_PASS; } return SK_PASS; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c index 8bdb9987c0c7..83df4919c224 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c @@ -7,6 +7,8 @@ int cork_byte; int push_start; int push_end; int apply_bytes; +int pop_start; +int pop_end; struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); @@ -22,6 +24,8 @@ int prog_sk_policy(struct sk_msg_md *msg) bpf_msg_cork_bytes(msg, cork_byte); if (push_start > 0 && push_end > 0) bpf_msg_push_data(msg, push_start, push_end, 0); + if (pop_start >= 0 && pop_end > 0) + bpf_msg_pop_data(msg, pop_start, pop_end, 0); return SK_PASS; } diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c index 28edafe803f0..fcba8299f0bc 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c +++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> -#include <linux/if_ether.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/pkt_cls.h> + +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +#endif +#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2) long change_tail_ret = 1; @@ -94,7 +94,7 @@ int change_tail(struct __sk_buff *skb) bpf_skb_change_tail(skb, len, 0); return TCX_PASS; } else if (payload[0] == 'E') { /* Error */ - change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0); return TCX_PASS; } else if (payload[0] == 'Z') { /* Zero */ change_tail_ret = bpf_skb_change_tail(skb, 0, 0); diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c index e96d09e11115..ff8d755548b9 100644 --- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -17,7 +17,7 @@ #define MAX_SIG_SIZE 1024 __u32 monitored_pid; -__u32 user_keyring_serial; +__s32 user_keyring_serial; __u64 system_keyring_id; struct data { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index dc74d8cf9e3f..5904f45cfbc4 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -19,7 +19,9 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) /* Data length determine test case */ if (data_len == 54) { /* sizeof(pkt_v4) */ - offset = 4096; /* test too large offset */ + offset = 4096; /* test too large offset, 4k page size */ + } else if (data_len == 53) { /* sizeof(pkt_v4) - 1 */ + offset = 65536; /* test too large offset, 64k page size */ } else if (data_len == 74) { /* sizeof(pkt_v6) */ offset = 40; } else if (data_len == 64) { @@ -31,6 +33,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) offset = 10; } else if (data_len == 9001) { offset = 4096; + } else if (data_len == 90000) { + offset = 10; /* test a small offset, 64k page size */ + } else if (data_len == 90001) { + offset = 65536; /* test too large offset, 64k page size */ } else { return XDP_ABORTED; /* No matching test */ } diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c index d41665d2ec8c..65e485c4468c 100644 --- a/tools/testing/selftests/bpf/progs/tracing_failure.c +++ b/tools/testing/selftests/bpf/progs/tracing_failure.c @@ -18,3 +18,15 @@ int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock) { return 0; } + +SEC("?fentry/__rcu_read_lock") +int BPF_PROG(tracing_deny) +{ + return 0; +} + +SEC("?fexit/do_exit") +int BPF_PROG(fexit_noreturns) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c index e97e518516b6..2b4fdca162be 100644 --- a/tools/testing/selftests/bpf/progs/verifier_and.c +++ b/tools/testing/selftests/bpf/progs/verifier_and.c @@ -85,8 +85,14 @@ l0_%=: r0 = r0; \ SEC("socket") __description("check known subreg with unknown reg") -__success __failure_unpriv __msg_unpriv("R1 !read_ok") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w0 < 0x1 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R1 !read_ok'` */ +__xlated_unpriv("goto pc-1") /* `r1 = *(u32*)(r1 + 512)`, sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void known_subreg_with_unknown_reg(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 67509c5d3982..7f4827eede3c 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -3,6 +3,7 @@ #define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" @@ -114,6 +115,111 @@ int basic_alloc3(void *ctx) return 0; } +SEC("syscall") +__success __retval(0) +int basic_reserve1(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page) + return 1; + + page += __PAGE_SIZE; + + /* Reserve the second page */ + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 2; + + /* Try to explicitly allocate the reserved page. */ + page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0); + if (page) + return 3; + + /* Try to implicitly allocate the page (since there's only 2 of them). */ + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (page) + return 4; +#endif + return 0; +} + +SEC("syscall") +__success __retval(0) +int basic_reserve2(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = arena_base(&arena); + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 1; + + page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0); + if ((u64)page) + return 2; +#endif + return 0; +} + +/* Reserve the same page twice, should return -EBUSY. */ +SEC("syscall") +__success __retval(0) +int reserve_twice(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = arena_base(&arena); + + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 1; + + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret != -EBUSY) + return 2; +#endif + return 0; +} + +/* Try to reserve past the end of the arena. */ +SEC("syscall") +__success __retval(0) +int reserve_invalid_region(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + /* Try a NULL pointer. */ + ret = bpf_arena_reserve_pages(&arena, NULL, 3); + if (ret != -EINVAL) + return 1; + + page = arena_base(&arena); + + ret = bpf_arena_reserve_pages(&arena, page, 3); + if (ret != -EINVAL) + return 2; + + ret = bpf_arena_reserve_pages(&arena, page, 4096); + if (ret != -EINVAL) + return 3; + + ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1); + if (ret != -EINVAL) + return 4; +#endif + return 0; +} + SEC("iter.s/bpf_map") __success __log_level(2) int iter_maps1(struct bpf_iter__bpf_map *ctx) diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index f94f30cf1bb8..9dbdf123542d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -67,6 +67,104 @@ int big_alloc1(void *ctx) return 0; } +/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */ +SEC("syscall") +__success __retval(0) +int access_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page; + char __arena *base; + const size_t len = 4; + int ret, i; + + /* Get a separate region of the arena. */ + page = base = arena_base(&arena) + 16384 * PAGE_SIZE; + + ret = bpf_arena_reserve_pages(&arena, base, len); + if (ret) + return 1; + + /* Try to dirty reserved memory. */ + for (i = 0; i < len && can_loop; i++) + *page = 0x5a; + + for (i = 0; i < len && can_loop; i++) { + page = (volatile char __arena *)(base + i * PAGE_SIZE); + + /* + * Error out in case either the write went through, + * or the address has random garbage. + */ + if (*page == 0x5a) + return 2 + 2 * i; + + if (*page) + return 2 + 2 * i + 1; + } +#endif + return 0; +} + +/* Try to allocate a region overlapping with a reservation. */ +SEC("syscall") +__success __retval(0) +int request_partially_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page; + char __arena *base; + int ret; + + /* Add an arbitrary page offset. */ + page = base = arena_base(&arena) + 4096 * __PAGE_SIZE; + + ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4); + if (ret) + return 1; + + page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0); + if ((u64)page != 0ULL) + return 2; +#endif + return 0; +} + +SEC("syscall") +__success __retval(0) +int free_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *addr; + char __arena *page; + int ret; + + /* Add an arbitrary page offset. */ + addr = arena_base(&arena) + 32768 * __PAGE_SIZE; + + page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0); + if (!page) + return 1; + + ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2); + if (ret) + return 2; + + /* + * Reserved and allocated pages should be interchangeable for + * bpf_arena_free_pages(). Free a reserved and an allocated + * page with a single call. + */ + bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2); + + /* The free call above should have succeeded, so this allocation should too. */ + page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0); + if (!page) + return 3; +#endif + return 0; +} + #if defined(__BPF_FEATURE_ADDR_SPACE_CAST) #define PAGE_CNT 100 __u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */ diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 0eb33bb801b5..87a2c60d86e6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -2,6 +2,7 @@ /* Converted from tools/testing/selftests/bpf/verifier/bounds.c */ #include <linux/bpf.h> +#include <../../../include/linux/filter.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" @@ -620,8 +621,14 @@ l1_%=: exit; \ SEC("socket") __description("bounds check mixed 32bit and 64bit arithmetic. test1") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void _32bit_and_64bit_arithmetic_test1(void) { asm volatile (" \ @@ -643,8 +650,14 @@ l1_%=: exit; \ SEC("socket") __description("bounds check mixed 32bit and 64bit arithmetic. test2") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void _32bit_and_64bit_arithmetic_test2(void) { asm volatile (" \ @@ -691,9 +704,14 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg = 0, reg xor 1") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 != 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_0_reg_xor_1(void) { asm volatile (" \ @@ -719,9 +737,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg32 = 0, reg32 xor 1") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w1 != 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg32_0_reg32_xor_1(void) { asm volatile (" \ @@ -747,9 +770,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg = 2, reg xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 > 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_2_reg_xor_3(void) { asm volatile (" \ @@ -829,9 +857,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg > 0, reg xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 >= 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_0_reg_xor_3(void) { asm volatile (" \ @@ -858,9 +891,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg32 > 0, reg32 xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w1 >= 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg32_0_reg32_xor_3(void) { asm volatile (" \ @@ -1028,7 +1066,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP_JSLT for crossing 64-bit signed boundary") __success __retval(0) -__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ +__flag(BPF_F_TEST_REG_INVARIANTS) __naked void crossing_64_bit_signed_boundary_2(void) { asm volatile (" \ @@ -1334,4 +1372,300 @@ __naked void mult_sign_ovf(void) __imm(bpf_skb_store_bytes) : __clobber_all); } + +SEC("socket") +__description("64-bit addition, all outcomes overflow") +__success __log_level(2) +__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)") +__retval(0) +__naked void add64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 0xa000000000000000 ll;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit addition, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()") +__retval(0) +__naked void add64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 2;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition overflow, all outcomes overflow") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 0xa0000000;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 2;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtraction, all outcomes underflow") +__success __log_level(2) +__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)") +__retval(0) +__naked void sub64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = r0;" + "r2 = 0x8000000000000000 ll;" + "r1 |= r2;" + "r3 = 0;" + "r3 -= r1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtraction, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()") +__retval(0) +__naked void sub64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r3 = r0;" + "r2 = 1;" + "r3 -= r2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtraction overflow, all outcomes underflow") +__success __log_level(2) +__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w1 = w0;" + "w2 = 0x80000000;" + "w1 |= w2;" + "w3 = 0;" + "w3 -= w1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtraction, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w3 = w0;" + "w2 = 1;" + "w3 -= w2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("dead branch on jset, does not result in invariants violation error") +__success __log_level(2) +__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS) +__naked void jset_range_analysis(void) +{ + asm volatile (" \ + call %[bpf_get_netns_cookie]; \ + if r0 == 0 goto l0_%=; \ + if r0 & 0xffffffff goto +0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_netns_cookie) + : __clobber_all); +} + +/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges + * overlap on the negative side. At instruction 7, the ranges look as follows: + * + * 0 umin=0xfffffcf1 umax=0xff..ff6e U64_MAX + * | [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * |xxxxxxxxxx] [xxxxxxxxxxxx| + * 0 smax=0xeffffeee smin=-655 -1 + * + * We should therefore deduce the following new bounds: + * + * 0 u64=[0xff..ffd71;0xff..ff6e] U64_MAX + * | [xxx] | + * |----------------------------|------------------------------| + * | [xxx] | + * 0 s64=[-655;-146] -1 + * + * Without the deduction cross sign boundary, we end up with an invariant + * violation error. + */ +SEC("socket") +__description("bounds deduction cross sign boundary, negative overlap") +__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS) +__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=smin32=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,umin32=0xfffffd71,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))") +__retval(0) +__naked void bounds_deduct_negative_overlap(void) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w3 = w0; \ + w6 = (s8)w0; \ + r0 = (s8)r0; \ + if w6 >= 0xf0000000 goto l0_%=; \ + r0 += r6; \ + r6 += 400; \ + r0 -= r6; \ + if r3 < r0 goto l0_%=; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges + * overlap on the positive side. At instruction 3, the ranges look as follows: + * + * 0 umin=0 umax=0xffffffffffffff00 U64_MAX + * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * |xxxxxxxx] [xxxxxxxx| + * 0 smax=127 smin=-128 -1 + * + * We should therefore deduce the following new bounds: + * + * 0 u64=[0;127] U64_MAX + * [xxxxxxxx] | + * |----------------------------|------------------------------| + * [xxxxxxxx] | + * 0 s64=[0;127] -1 + * + * Without the deduction cross sign boundary, the program is rejected due to + * the frame pointer write. + */ +SEC("socket") +__description("bounds deduction cross sign boundary, positive overlap") +__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS) +__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))") +__retval(0) +__naked void bounds_deduct_positive_overlap(void) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 = (s8)r0; \ + r1 = 0xffffffffffffff00; \ + if r0 > r1 goto l0_%=; \ + if r0 < 128 goto l0_%=; \ + r10 = 0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test is the same as above, but the s64 and u64 ranges overlap in two + * places. At instruction 3, the ranges look as follows: + * + * 0 umin=0 umax=0xffffffffffffff80 U64_MAX + * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * |xxxxxxxx] [xxxxxxxx| + * 0 smax=127 smin=-128 -1 + * + * 0xffffffffffffff80 = (u64)-128. We therefore can't deduce anything new and + * the program should fail due to the frame pointer write. + */ +SEC("socket") +__description("bounds deduction cross sign boundary, two overlaps") +__failure __flag(BPF_F_TEST_REG_INVARIANTS) +__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)") +__msg("frame pointer is read only") +__naked void bounds_deduct_two_overlaps(void) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 = (s8)r0; \ + r1 = 0xffffffffffffff80; \ + if r0 > r1 goto l0_%=; \ + if r0 < 128 goto l0_%=; \ + r10 = 0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c index c506afbdd936..260a6df264e3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c @@ -159,13 +159,16 @@ __failure_unpriv __naked void deducing_bounds_from_const_10(void) { asm volatile (" \ + r6 = r1; \ r0 = 0; \ if r0 s<= 0 goto l0_%=; \ -l0_%=: /* Marks reg as unknown. */ \ - r0 = -r0; \ - r0 -= r1; \ +l0_%=: /* Marks r0 as unknown. */ \ + call %[bpf_get_prandom_u32]; \ + r0 -= r6; \ exit; \ -" ::: __clobber_all); +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c index a83809a1dbbf..0450840c92d9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c @@ -218,4 +218,29 @@ __naked void null_check_8_null_bind(void) : __clobber_all); } +#define narrow_load(type, ctx, field) \ + SEC(type) \ + __description("narrow load on field " #field " of " #ctx) \ + __failure __msg("invalid bpf_context access") \ + __naked void invalid_narrow_load##ctx##field(void) \ + { \ + asm volatile (" \ + r1 = *(u32 *)(r1 + %[off]); \ + r0 = 0; \ + exit;" \ + : \ + : __imm_const(off, offsetof(struct ctx, field) + 4) \ + : __clobber_all); \ + } + +narrow_load("cgroup/getsockopt", bpf_sockopt, sk); +narrow_load("cgroup/getsockopt", bpf_sockopt, optval); +narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end); +narrow_load("tc", __sk_buff, sk); +narrow_load("cgroup/bind4", bpf_sock_addr, sk); +narrow_load("sockops", bpf_sock_ops, sk); +narrow_load("sockops", bpf_sock_ops, skb_data); +narrow_load("sockops", bpf_sock_ops, skb_data_end); +narrow_load("sockops", bpf_sock_ops, skb_hwtstamp); + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c index 458984da804c..34e0c012ee76 100644 --- a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c +++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c @@ -77,7 +77,7 @@ l0_%=: exit; \ SEC("tc") __description("MOD32 overflow, check 1") -__success __retval(INT_MIN) +__success __retval(_INT_MIN) __naked void mod32_overflow_check_1(void) { asm volatile (" \ @@ -92,7 +92,7 @@ __naked void mod32_overflow_check_1(void) SEC("tc") __description("MOD32 overflow, check 2") -__success __retval(INT_MIN) +__success __retval(_INT_MIN) __naked void mod32_overflow_check_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 4ab0ef18d7eb..181da86ba5f0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -179,4 +179,132 @@ int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags) return subprog_trusted_acq_rel(task); } +__weak int subprog_untrusted_bad_tags(struct task_struct *task __arg_untrusted __arg_nullable) +{ + return task->pid; +} + +SEC("tp_btf/sys_enter") +__failure +__msg("arg#0 untrusted cannot be combined with any other tags") +int untrusted_bad_tags(void *ctx) +{ + return subprog_untrusted_bad_tags(0); +} + +struct local_type_wont_be_accepted {}; + +__weak int subprog_untrusted_bad_type(struct local_type_wont_be_accepted *p __arg_untrusted) +{ + return 0; +} + +SEC("tp_btf/sys_enter") +__failure +__msg("arg#0 reference type('STRUCT local_type_wont_be_accepted') has no matches") +int untrusted_bad_type(void *ctx) +{ + return subprog_untrusted_bad_type(bpf_rdonly_cast(0, 0)); +} + +__weak int subprog_untrusted(const volatile struct task_struct *restrict task __arg_untrusted) +{ + return task->pid; +} + +SEC("tp_btf/sys_enter") +__success +__log_level(2) +__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("Func#1 ('subprog_untrusted') is global and assumed valid.") +__msg("Validating subprog_untrusted() func#1...") +__msg(": R1=untrusted_ptr_task_struct") +int trusted_to_untrusted(void *ctx) +{ + return subprog_untrusted(bpf_get_current_task_btf()); +} + +char mem[16]; +u32 off; + +SEC("tp_btf/sys_enter") +__success +int anything_to_untrusted(void *ctx) +{ + /* untrusted to untrusted */ + subprog_untrusted(bpf_core_cast(0, struct task_struct)); + /* wrong type to untrusted */ + subprog_untrusted((void *)bpf_core_cast(0, struct bpf_verifier_env)); + /* map value to untrusted */ + subprog_untrusted((void *)mem); + /* scalar to untrusted */ + subprog_untrusted(0); + /* variable offset to untrusted (map) */ + subprog_untrusted((void *)mem + off); + /* variable offset to untrusted (trusted) */ + subprog_untrusted((void *)bpf_get_current_task_btf() + off); + return 0; +} + +__weak int subprog_untrusted2(struct task_struct *task __arg_untrusted) +{ + return subprog_trusted_task_nullable(task); +} + +SEC("tp_btf/sys_enter") +__failure +__msg("R1 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_") +__msg("Caller passes invalid args into func#{{.*}} ('subprog_trusted_task_nullable')") +int untrusted_to_trusted(void *ctx) +{ + return subprog_untrusted2(bpf_get_current_task_btf()); +} + +__weak int subprog_void_untrusted(void *p __arg_untrusted) +{ + return *(int *)p; +} + +__weak int subprog_char_untrusted(char *p __arg_untrusted) +{ + return *(int *)p; +} + +__weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted) +{ + return *(int *)p; +} + +SEC("tp_btf/sys_enter") +__success +__log_level(2) +__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.") +__msg("Validating subprog_void_untrusted() func#1...") +__msg(": R1=rdonly_untrusted_mem(sz=0)") +int trusted_to_untrusted_mem(void *ctx) +{ + return subprog_void_untrusted(bpf_get_current_task_btf()); +} + +SEC("tp_btf/sys_enter") +__success +int anything_to_untrusted_mem(void *ctx) +{ + /* untrusted to untrusted mem */ + subprog_void_untrusted(bpf_core_cast(0, struct task_struct)); + /* map value to untrusted mem */ + subprog_void_untrusted(mem); + /* scalar to untrusted mem */ + subprog_void_untrusted(0); + /* variable offset to untrusted mem (map) */ + subprog_void_untrusted((void *)mem + off); + /* variable offset to untrusted mem (trusted) */ + subprog_void_untrusted(bpf_get_current_task_btf() + off); + /* variable offset to untrusted char/enum (map) */ + subprog_char_untrusted(mem + off); + subprog_enum_untrusted((void *)mem + off); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c index 7d088ba99ea5..16b761e510f0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c @@ -139,4 +139,122 @@ __naked void on_the_inner_map_pointer(void) : __clobber_all); } +SEC("socket") +__description("map_ptr is never null") +__success +__naked void map_ptr_is_never_null(void) +{ + asm volatile (" \ + r0 = 0; \ + r1 = %[map_in_map] ll; \ + if r1 != 0 goto l0_%=; \ + r10 = 42; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +SEC("socket") +__description("map_ptr is never null inner") +__success +__naked void map_ptr_is_never_null_inner(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u32*)(r10 - 4) = r1; \ + r2 = r10; \ + r2 += -4; \ + r1 = %[map_in_map] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + if r0 != 0 goto l0_%=; \ + r10 = 42; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +SEC("socket") +__description("map_ptr is never null inner spill fill") +__success +__naked void map_ptr_is_never_null_inner_spill_fill(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u32*)(r10 - 4) = r1; \ + r2 = r10; \ + r2 += -4; \ + r1 = %[map_in_map] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 != 0 goto l0_%=; \ + exit; \ +l0_%=: *(u64 *)(r10 -16) = r0; \ + r1 = *(u64 *)(r10 -16); \ + if r1 == 0 goto l1_%=; \ + exit; \ +l1_%=: r10 = 42; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64 * 1024); + }); +} rb_in_map SEC(".maps"); + +struct rb_ctx { + void *rb; + struct bpf_dynptr dptr; +}; + +static __always_inline struct rb_ctx __rb_event_reserve(__u32 sz) +{ + struct rb_ctx rb_ctx = {}; + void *rb; + __u32 cpu = bpf_get_smp_processor_id(); + __u32 rb_slot = cpu & 1; + + rb = bpf_map_lookup_elem(&rb_in_map, &rb_slot); + if (!rb) + return rb_ctx; + + rb_ctx.rb = rb; + bpf_ringbuf_reserve_dynptr(rb, sz, 0, &rb_ctx.dptr); + + return rb_ctx; +} + +static __noinline void __rb_event_submit(struct rb_ctx *ctx) +{ + if (!ctx->rb) + return; + + /* If the verifier (incorrectly) concludes that ctx->rb can be + * NULL at this point, we'll get "BPF_EXIT instruction in main + * prog would lead to reference leak" error + */ + bpf_ringbuf_submit_dynptr(&ctx->dptr, 0); +} + +SEC("socket") +int map_ptr_is_never_null_rb(void *ctx) +{ + struct rb_ctx event_ctx = __rb_event_reserve(256); + __rb_event_submit(&event_ctx); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index 994bbc346d25..a4d8814eb5ed 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -245,7 +245,13 @@ l0_%=: \ SEC("socket") __description("MOV32SX, S8, var_off not u32_max, positive after s8 extension") __success __retval(0) -__failure_unpriv __msg_unpriv("frame pointer is read only") +__success_unpriv +#ifdef SPEC_V1 +__xlated_unpriv("w0 = 0") +__xlated_unpriv("exit") +__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */ +__xlated_unpriv("goto pc-1") +#endif __naked void mov64sx_s32_varoff_2(void) { asm volatile (" \ @@ -267,7 +273,13 @@ l0_%=: \ SEC("socket") __description("MOV32SX, S8, var_off not u32_max, negative after s8 extension") __success __retval(0) -__failure_unpriv __msg_unpriv("frame pointer is read only") +__success_unpriv +#ifdef SPEC_V1 +__xlated_unpriv("w0 = 0") +__xlated_unpriv("exit") +__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */ +__xlated_unpriv("goto pc-1") +#endif __naked void mov64sx_s32_varoff_3(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 9fe5d255ee37..73fee2aec698 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -231,4 +231,74 @@ __naked void bpf_cond_op_not_r10(void) ::: __clobber_all); } +SEC("lsm.s/socket_connect") +__success __log_level(2) +__msg("0: (b7) r0 = 1 ; R0_w=1") +__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff") +__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0") +__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") +__naked int bpf_neg_2(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -1 is allowed + */ + asm volatile ( + "r0 = 1;" + "w0 = -w0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__failure __msg("At program exit the register R0 has") +__naked int bpf_neg_3(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -10000 is not allowed. + */ + asm volatile ( + "r0 = 10000;" + "w0 = -w0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__success __log_level(2) +__msg("0: (b7) r0 = 1 ; R0_w=1") +__msg("1: (87) r0 = -r0 ; R0_w=-1") +__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0") +__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") +__naked int bpf_neg_4(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -1 is allowed + */ + asm volatile ( + "r0 = 1;" + "r0 = -r0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__failure __msg("At program exit the register R0 has") +__naked int bpf_neg_5(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -10000 is not allowed. + */ + asm volatile ( + "r0 = 10000;" + "r0 = -r0;" + "exit;" + ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c index fc91b414364e..1ecd34ebde19 100644 --- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -8,7 +8,7 @@ /* From include/linux/filter.h */ #define MAX_BPF_STACK 512 -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) struct elem { struct bpf_timer t; @@ -30,6 +30,18 @@ __jited(" movabsq $0x{{.*}}, %r9") __jited(" addq %gs:{{.*}}, %r9") __jited(" movl $0x2a, %edi") __jited(" movq %rdi, -0x100(%r9)") +__arch_arm64 +__jited(" stp x25, x27, [sp, {{.*}}]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited("...") +__jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_single_prog(void) { asm volatile (" \ @@ -45,6 +57,9 @@ __description("No private stack") __success __arch_x86_64 __jited(" subq $0x8, %rsp") +__arch_arm64 +__jited(" mov x25, sp") +__jited(" sub sp, sp, #0x10") __naked void no_private_stack_nested(void) { asm volatile (" \ @@ -81,6 +96,19 @@ __jited(" pushq %r9") __jited(" callq 0x{{.*}}") __jited(" popq %r9") __jited(" xorl %eax, %eax") +__arch_arm64 +__jited(" stp x25, x27, [sp, {{.*}}]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" bl {{.*}}") +__jited("...") +__jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_nested_1(void) { asm volatile (" \ @@ -131,6 +159,24 @@ __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") +__arch_arm64 +__jited("func #1") +__jited("...") +__jited(" stp x25, x27, [sp, {{.*}}]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" mov x7, #0x0") +__jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_callback(void) { asm volatile (" \ @@ -154,6 +200,28 @@ __arch_x86_64 __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") +__arch_arm64 +__jited(" stp x29, x30, [sp, #-0x10]!") +__jited(" mov x29, sp") +__jited(" stp xzr, x26, [sp, #-0x10]!") +__jited(" mov x26, sp") +__jited(" stp x19, x20, [sp, #-0x10]!") +__jited(" stp x21, x22, [sp, #-0x10]!") +__jited(" stp x23, x24, [sp, #-0x10]!") +__jited(" stp x25, x26, [sp, #-0x10]!") +__jited(" stp x27, x28, [sp, #-0x10]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" mov x0, #0x0") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" ldp x27, x28, [sp], #0x10") int private_stack_exception_main_prog(void) { asm volatile (" \ @@ -179,6 +247,19 @@ __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") +__arch_arm64 +__jited(" stp x27, x28, [sp, #-0x10]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" ldp x27, x28, [sp], #0x10") int private_stack_exception_sub_prog(void) { asm volatile (" \ @@ -220,6 +301,10 @@ __description("Private stack, async callback, not nested") __success __retval(0) __arch_x86_64 __jited(" movabsq $0x{{.*}}, %r9") +__arch_arm64 +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") int private_stack_async_callback_1(void) { struct bpf_timer *arr_timer; @@ -241,6 +326,8 @@ __description("Private stack, async callback, potential nesting") __success __retval(0) __arch_x86_64 __jited(" subq $0x100, %rsp") +__arch_arm64 +__jited(" sub sp, sp, #0x100") int private_stack_async_callback_2(void) { struct bpf_timer *arr_timer; diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c index 683a882b3e6d..910365201f68 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c +++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c @@ -27,7 +27,7 @@ struct bpf_key {} __attribute__((preserve_access_index)); extern void bpf_key_put(struct bpf_key *key) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; /* BTF FUNC records are not generated for kfuncs referenced * from inline assembly. These records are necessary for diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall.c b/tools/testing/selftests/bpf/progs/verifier_tailcall.c new file mode 100644 index 000000000000..b4acce60fb9b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_tailcall.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} map_array SEC(".maps"); + +SEC("socket") +__description("invalid map type for tail call") +__failure __msg("expected prog array map for tail call") +__failure_unpriv +__naked void invalid_map_for_tail_call(void) +{ + asm volatile (" \ + r2 = %[map_array] ll; \ + r3 = 0; \ + call %[bpf_tail_call]; \ + exit; \ +" : + : __imm(bpf_tail_call), + __imm_addr(map_array) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index a4a5e2071604..28b4f7035ceb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -572,8 +572,14 @@ l0_%=: exit; \ SEC("socket") __description("alu32: mov u32 const") -__success __failure_unpriv __msg_unpriv("R7 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r0 == 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R7 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void alu32_mov_u32_const(void) { asm volatile (" \ @@ -619,12 +625,11 @@ __naked void pass_pointer_to_tail_call(void) SEC("socket") __description("unpriv: cmp map pointer with zero") -__success __failure_unpriv __msg_unpriv("R1 pointer comparison") +__success __success_unpriv __retval(0) __naked void cmp_map_pointer_with_zero(void) { asm volatile (" \ - r1 = 0; \ r1 = %[map_hash_8b] ll; \ if r1 == 0 goto l0_%=; \ l0_%=: r0 = 0; \ @@ -635,6 +640,22 @@ l0_%=: r0 = 0; \ } SEC("socket") +__description("unpriv: cmp map pointer with const") +__success __failure_unpriv __msg_unpriv("R1 pointer comparison prohibited") +__retval(0) +__naked void cmp_map_pointer_with_const(void) +{ + asm volatile (" \ + r1 = %[map_hash_8b] ll; \ + if r1 == 0x0000beef goto l0_%=; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") __description("unpriv: write into frame pointer") __failure __msg("frame pointer is read only") __failure_unpriv @@ -723,4 +744,210 @@ l0_%=: r0 = 0; \ " ::: __clobber_all); } +SEC("socket") +__description("unpriv: Spectre v1 path-based type confusion of scalar as stack-ptr") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r0 != 0x1 goto pc+2") +/* This nospec prevents the exploit because it forces the mispredicted (not + * taken) `if r0 != 0x0 goto l0_%=` to resolve before using r6 as a pointer. + * This causes the CPU to realize that `r6 = r9` should have never executed. It + * ensures that r6 always contains a readable stack slot ptr when the insn after + * the nospec executes. + */ +__xlated_unpriv("nospec") +__xlated_unpriv("r9 = *(u8 *)(r6 +0)") +#endif +__naked void unpriv_spec_v1_type_confusion(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l2_%=; \ + /* r0: pointer to a map array entry */ \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + /* r1, r2: prepared call args */ \ + r6 = r10; \ + r6 += -8; \ + /* r6: pointer to readable stack slot */ \ + r9 = 0xffffc900; \ + r9 <<= 32; \ + /* r9: scalar controlled by attacker */ \ + r0 = *(u64 *)(r0 + 0); /* cache miss */ \ + if r0 != 0x0 goto l0_%=; \ + r6 = r9; \ +l0_%=: if r0 != 0x1 goto l1_%=; \ + r9 = *(u8 *)(r6 + 0); \ +l1_%=: /* leak r9 */ \ + r9 &= 1; \ + r9 <<= 9; \ + *(u64*)(r10 - 8) = r9; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l2_%=; \ + /* leak secret into is_cached(map[0|512]): */ \ + r0 = *(u64 *)(r0 + 0); \ +l2_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("unpriv: ldimm64 before Spectre v4 barrier") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V4 +__xlated_unpriv("r1 = 0x2020200005642020") /* should not matter */ +__xlated_unpriv("*(u64 *)(r10 -8) = r1") +__xlated_unpriv("nospec") +#endif +__naked void unpriv_ldimm64_spectre_v4(void) +{ + asm volatile (" \ + r1 = 0x2020200005642020 ll; \ + *(u64 *)(r10 -8) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unpriv: Spectre v1 and v4 barrier") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V1 +#ifdef SPEC_V4 +/* starts with r0 == r8 == r9 == 0 */ +__xlated_unpriv("if r8 != 0x0 goto pc+1") +__xlated_unpriv("goto pc+2") +__xlated_unpriv("if r9 == 0x0 goto pc+4") +__xlated_unpriv("r2 = r0") +/* Following nospec required to prevent following dangerous `*(u64 *)(NOT_FP -64) + * = r1` iff `if r9 == 0 goto pc+4` was mispredicted because of Spectre v1. The + * test therefore ensures the Spectre-v4--induced nospec does not prevent the + * Spectre-v1--induced speculative path from being fully analyzed. + */ +__xlated_unpriv("nospec") /* Spectre v1 */ +__xlated_unpriv("*(u64 *)(r2 -64) = r1") /* could be used to leak r2 */ +__xlated_unpriv("nospec") /* Spectre v4 */ +#endif +#endif +__naked void unpriv_spectre_v1_and_v4(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r8 = r0; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r9 = r0; \ + r0 = r10; \ + r1 = 0; \ + r2 = r10; \ + if r8 != 0 goto l0_%=; \ + if r9 != 0 goto l0_%=; \ + r0 = 0; \ +l0_%=: if r8 != 0 goto l1_%=; \ + goto l2_%=; \ +l1_%=: if r9 == 0 goto l3_%=; \ + r2 = r0; \ +l2_%=: *(u64 *)(r2 -64) = r1; \ +l3_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("unpriv: Spectre v1 and v4 barrier (simple)") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V1 +#ifdef SPEC_V4 +__xlated_unpriv("if r8 != 0x0 goto pc+1") +__xlated_unpriv("goto pc+2") +__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */ +__xlated_unpriv("goto pc-1") /* r2 = r0 */ +__xlated_unpriv("nospec") +__xlated_unpriv("*(u64 *)(r2 -64) = r1") +__xlated_unpriv("nospec") +#endif +#endif +__naked void unpriv_spectre_v1_and_v4_simple(void) +{ + asm volatile (" \ + r8 = 0; \ + r9 = 0; \ + r0 = r10; \ + r1 = 0; \ + r2 = r10; \ + if r8 != 0 goto l0_%=; \ + if r9 != 0 goto l0_%=; \ + r0 = 0; \ +l0_%=: if r8 != 0 goto l1_%=; \ + goto l2_%=; \ +l1_%=: if r9 == 0 goto l3_%=; \ + r2 = r0; \ +l2_%=: *(u64 *)(r2 -64) = r1; \ +l3_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unpriv: ldimm64 before Spectre v1 and v4 barrier (simple)") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V1 +#ifdef SPEC_V4 +__xlated_unpriv("if r8 != 0x0 goto pc+1") +__xlated_unpriv("goto pc+4") +__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */ +__xlated_unpriv("goto pc-1") /* r2 = r0 */ +__xlated_unpriv("goto pc-1") /* r1 = 0x2020200005642020 ll */ +__xlated_unpriv("goto pc-1") /* second part of ldimm64 */ +__xlated_unpriv("nospec") +__xlated_unpriv("*(u64 *)(r2 -64) = r1") +__xlated_unpriv("nospec") +#endif +#endif +__naked void unpriv_ldimm64_spectre_v1_and_v4_simple(void) +{ + asm volatile (" \ + r8 = 0; \ + r9 = 0; \ + r0 = r10; \ + r1 = 0; \ + r2 = r10; \ + if r8 != 0 goto l0_%=; \ + if r9 != 0 goto l0_%=; \ + r0 = 0; \ +l0_%=: if r8 != 0 goto l1_%=; \ + goto l2_%=; \ +l1_%=: if r9 == 0 goto l3_%=; \ + r2 = r0; \ + r1 = 0x2020200005642020 ll; \ +l2_%=: *(u64 *)(r2 -64) = r1; \ +l3_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c index 5ba6e53571c8..af7938ce56cb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c +++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c @@ -231,6 +231,10 @@ __retval(1) __naked void ptr_unknown_vs_unknown_lt(void) { asm volatile (" \ + r8 = r1; \ + call %[bpf_get_prandom_u32]; \ + r9 = r0; \ + r1 = r8; \ r0 = *(u32*)(r1 + %[__sk_buff_len]); \ r1 = 0; \ *(u64*)(r10 - 8) = r1; \ @@ -245,11 +249,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \ r4 = *(u8*)(r0 + 0); \ if r4 == 1 goto l3_%=; \ r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x3; \ goto l4_%=; \ l3_%=: r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x7; \ l4_%=: r1 += r0; \ r0 = *(u8*)(r1 + 0); \ @@ -259,7 +263,8 @@ l2_%=: r0 = 1; \ : __imm(bpf_map_lookup_elem), __imm_addr(map_array_48b), __imm_addr(map_hash_16b), - __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)) + __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)), + __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -271,6 +276,10 @@ __retval(1) __naked void ptr_unknown_vs_unknown_gt(void) { asm volatile (" \ + r8 = r1; \ + call %[bpf_get_prandom_u32]; \ + r9 = r0; \ + r1 = r8; \ r0 = *(u32*)(r1 + %[__sk_buff_len]); \ r1 = 0; \ *(u64*)(r10 - 8) = r1; \ @@ -285,11 +294,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \ r4 = *(u8*)(r0 + 0); \ if r4 == 1 goto l3_%=; \ r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x7; \ goto l4_%=; \ l3_%=: r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x3; \ l4_%=: r1 += r0; \ r0 = *(u8*)(r1 + 0); \ @@ -299,7 +308,8 @@ l2_%=: r0 = 1; \ : __imm(bpf_map_lookup_elem), __imm_addr(map_array_48b), __imm_addr(map_hash_16b), - __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)) + __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)), + __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -398,7 +408,8 @@ l2_%=: r0 = 1; \ SEC("socket") __description("map access: mixing value pointer and scalar, 1") -__success __failure_unpriv __msg_unpriv("R2 pointer comparison prohibited") +__success __failure_unpriv +__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root") __retval(0) __naked void value_pointer_and_scalar_1(void) { @@ -433,6 +444,7 @@ l2_%=: /* common instruction */ \ l3_%=: /* branch B */ \ r0 = 0x13371337; \ /* verifier follows fall-through */ \ + /* unpriv: nospec (inserted to prevent `R2 pointer comparison prohibited`) */\ if r2 != 0x100000 goto l4_%=; \ r0 = 0; \ exit; \ @@ -450,7 +462,8 @@ l4_%=: /* fake-dead code; targeted from branch A to \ SEC("socket") __description("map access: mixing value pointer and scalar, 2") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __failure_unpriv +__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root") __retval(0) __naked void value_pointer_and_scalar_2(void) { @@ -492,6 +505,7 @@ l4_%=: /* fake-dead code; targeted from branch A to \ * prevent dead code sanitization, rejected \ * via branch B however \ */ \ + /* unpriv: nospec (inserted to prevent `R0 invalid mem access 'scalar'`) */\ r0 = *(u8*)(r0 + 0); \ r0 = 0; \ exit; \ @@ -1296,9 +1310,13 @@ l0_%=: r0 = 1; \ SEC("socket") __description("map access: value_ptr -= unknown scalar, 2") -__success __failure_unpriv -__msg_unpriv("R0 pointer arithmetic of map value goes out of range") +__success __success_unpriv __retval(1) +#ifdef SPEC_V1 +__xlated_unpriv("r1 &= 7") +__xlated_unpriv("nospec") /* inserted to prevent `R0 pointer arithmetic of map value goes out of range` */ +__xlated_unpriv("r0 -= r1") +#endif __naked void value_ptr_unknown_scalar_2_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh new file mode 100755 index 000000000000..515b1df0501e --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_map.sh @@ -0,0 +1,398 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +TESTNAME="bpftool_map" +BPF_FILE="security_bpf_map.bpf.o" +BPF_ITER_FILE="bpf_iter_map_elem.bpf.o" +PROTECTED_MAP_NAME="prot_map" +NOT_PROTECTED_MAP_NAME="not_prot_map" +BPF_FS_TMP_PARENT="/tmp" +BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) +BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT} +# bpftool will mount bpf file system under BPF_DIR if it is not mounted +# under BPF_FS_PARENT. +BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME" +SCRIPT_DIR=$(dirname $(realpath "$0")) +BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE" +BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE" +BPFTOOL_PATH="bpftool" +# Assume the script is located under tools/testing/selftests/bpf/ +KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../) + +_cleanup() +{ + set +eu + + # If BPF_DIR is a mount point this will not remove the mount point itself. + [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null + + # Unmount if BPF filesystem was temporarily created. + if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then + # A loop and recursive unmount are required as bpftool might + # create multiple mounts. For example, a bind mount of the directory + # to itself. The bind mount is created to change mount propagation + # flags on an actual mount point. + max_attempts=3 + attempt=0 + while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do + umount -R "$BPF_DIR" 2>/dev/null + attempt=$((attempt+1)) + done + + # The directory still exists. Remove it now. + [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null + fi +} + +cleanup_skip() +{ + echo "selftests: $TESTNAME [SKIP]" + _cleanup + + exit $ksft_skip +} + +cleanup() +{ + if [ "$?" = 0 ]; then + echo "selftests: $TESTNAME [PASS]" + else + echo "selftests: $TESTNAME [FAILED]" + fi + _cleanup +} + +check_root_privileges() { + if [ $(id -u) -ne 0 ]; then + echo "Need root privileges" + exit $ksft_skip + fi +} + +# Function to verify bpftool path. +# Parameters: +# $1: bpftool path +verify_bpftool_path() { + local bpftool_path="$1" + if ! "$bpftool_path" version > /dev/null 2>&1; then + echo "Could not run test without bpftool" + exit $ksft_skip + fi +} + +# Function to verify BTF support. +# The test requires BTF support for fmod_ret programs. +verify_btf_support() { + if [ ! -f /sys/kernel/btf/vmlinux ]; then + echo "Could not run test without BTF support" + exit $ksft_skip + fi +} + +# Function to initialize map entries with keys [0..2] and values set to 0. +# Parameters: +# $1: Map name +# $2: bpftool path +initialize_map_entries() { + local map_name="$1" + local bpftool_path="$2" + + for key in 0 1 2; do + "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key + done +} + +# Test read access to the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +access_for_read() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + + # Test read access to the map. + if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + echo " Read access to $key in $map_name failed" + exit 1 + fi + + # Test read access to map's BTF data. + if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then + echo " Read access to $map_name for BTF data failed" + exit 1 + fi +} + +# Test write access to the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +# $5: Whether write should succeed (true/false) +access_for_write() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + local write_should_succeed="$5" + local value="1 1 1 1" + + if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \ + $value 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Write access to $key in $map_name succeeded but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Write access to $key in $map_name failed but should have succeeded" + exit 1 + fi + fi +} + +# Test entry deletion for the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +# $5: Whether write should succeed (true/false) +access_for_deletion() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + local write_should_succeed="$5" + local value="1 1 1 1" + + # Test deletion by key for the map. + # Before deleting, check the key exists. + if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + echo " Key $key does not exist in $map_name" + exit 1 + fi + + # Delete by key. + if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Deletion for $key in $map_name succeeded but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Deletion for $key in $map_name failed but should have succeeded" + exit 1 + fi + fi + + # After deleting, check the entry existence according to the expected status. + if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + if [ "$write_should_succeed" = "true" ]; then + echo " Key $key for $map_name was not deleted but should have been deleted" + exit 1 + fi + else + if [ "$write_should_succeed" = "false" ]; then + echo "Key $key for $map_name was deleted but should have not been deleted" + exit 1 + fi + fi + + # Test creation of map's deleted entry, if deletion was successful. + # Otherwise, the entry exists. + if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \ + $value 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded" + exit 1 + fi + fi +} + +# Test map elements iterator. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: BPF_DIR +# $5: bpf iterator object file path +iterate_map_elem() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local bpf_dir="$4" + local bpf_file="$5" + local pin_path="$bpf_dir/map_iterator" + + "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name" + if [ ! -f "$pin_path" ]; then + echo " Failed to pin iterator to $pin_path" + exit 1 + fi + + cat "$pin_path" 1>/dev/null + rm "$pin_path" 2>/dev/null +} + +# Function to test map access with configurable write expectations +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key for rw +# $5: key to delete +# $6: Whether write should succeed (true/false) +# $7: BPF_DIR +# $8: bpf iterator object file path +access_map() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key_for_rw="$4" + local key_to_del="$5" + local write_should_succeed="$6" + local bpf_dir="$7" + local bpf_iter_file_path="$8" + + access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" + access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \ + "$write_should_succeed" + access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \ + "$write_should_succeed" + iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \ + "$bpf_iter_file_path" +} + +# Function to test map access with configurable write expectations +# Parameters: +# $1: Map name +# $2: bpftool path +# $3: BPF_DIR +# $4: Whether write should succeed (true/false) +# $5: bpf iterator object file path +test_map_access() { + local map_name="$1" + local bpftool_path="$2" + local bpf_dir="$3" + local pin_path="$bpf_dir/${map_name}_pinned" + local write_should_succeed="$4" + local bpf_iter_file_path="$5" + + # Test access to the map by name. + access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \ + "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path" + + # Pin the map to the BPF filesystem + "$bpftool_path" map pin name "$map_name" "$pin_path" + if [ ! -e "$pin_path" ]; then + echo " Failed to pin $map_name" + exit 1 + fi + + # Test access to the pinned map. + access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \ + "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path" +} + +# Function to test map creation and map-of-maps +# Parameters: +# $1: bpftool path +# $2: BPF_DIR +test_map_creation_and_map_of_maps() { + local bpftool_path="$1" + local bpf_dir="$2" + local outer_map_name="outer_map_tt" + local inner_map_name="inner_map_tt" + + "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \ + value 4 entries 4 name "$inner_map_name" + if [ ! -f "$bpf_dir/$inner_map_name" ]; then + echo " Failed to create inner map file at $bpf_dir/$outer_map_name" + return 1 + fi + + "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \ + key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name" + if [ ! -f "$bpf_dir/$outer_map_name" ]; then + echo " Failed to create outer map file at $bpf_dir/$outer_map_name" + return 1 + fi + + # Add entries to the outer map by name and by pinned path. + "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \ + value pinned "$bpf_dir/$inner_map_name" + "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \ + name "$inner_map_name" + + # The outer map should be full by now. + # The following map update command is expected to fail. + if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \ + "$inner_map_name" 2>/dev/null; then + echo " Update for $outer_map_name succeeded but should have failed" + exit 1 + fi +} + +# Function to test map access with the btf list command +# Parameters: +# $1: bpftool path +test_map_access_with_btf_list() { + local bpftool_path="$1" + + # The btf list command iterates over maps for + # loaded BPF programs. + if ! "$bpftool_path" btf list 1>/dev/null; then + echo " Failed to access btf data" + exit 1 + fi +} + +set -eu + +trap cleanup_skip EXIT + +check_root_privileges + +verify_bpftool_path "$BPFTOOL_PATH" + +verify_btf_support + +trap cleanup EXIT + +# Load and attach the BPF programs to control maps access. +"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach + +initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" +initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" + +# Activate the map protection mechanism. Protection status is controlled +# by a value stored in the prot_status_map at index 0. +"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0 + +# Test protected map (write should fail). +test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \ + "$BPF_ITER_FILE_PATH" + +# Test not protected map (write should succeed). +test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \ + "$BPF_ITER_FILE_PATH" + +test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR" + +test_map_access_with_btf_list "$BPFTOOL_PATH" + +exit 0 diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 9551d8d5f8f9..78423cf89e01 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -40,7 +40,7 @@ #define TEST_TAG_LOAD_MODE_PFX "comment:load_mode=" /* Warning: duplicated in bpf_misc.h */ -#define POINTER_VALUE 0xcafe4all +#define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -318,20 +318,14 @@ static int parse_caps(const char *str, __u64 *val, const char *name) static int parse_retval(const char *str, int *val, const char *name) { - struct { - char *name; - int val; - } named_values[] = { - { "INT_MIN" , INT_MIN }, - { "POINTER_VALUE", POINTER_VALUE }, - { "TEST_DATA_LEN", TEST_DATA_LEN }, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(named_values); ++i) { - if (strcmp(str, named_values[i].name) != 0) - continue; - *val = named_values[i].val; + /* + * INT_MIN is defined as (-INT_MAX -1), i.e. it doesn't expand to a + * single int and cannot be parsed with strtol, so we handle it + * separately here. In addition, it expands to different expressions in + * different compilers so we use a prefixed _INT_MIN instead. + */ + if (strcmp(str, "_INT_MIN") == 0) { + *val = INT_MIN; return 0; } @@ -1103,9 +1097,9 @@ void run_subtest(struct test_loader *tester, } } - do_prog_test_run(bpf_program__fd(tprog), &retval, - bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); - if (retval != subspec->retval && subspec->retval != POINTER_VALUE) { + err = do_prog_test_run(bpf_program__fd(tprog), &retval, + bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); + if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) { PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); goto tobj_cleanup; } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 986ce32b113a..3fae9ce46ca9 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -535,7 +535,7 @@ static void test_devmap_hash(unsigned int task, void *data) static void test_queuemap(unsigned int task, void *data) { const int MAP_SIZE = 32; - __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0; int fd, i; /* Fill test values to be used */ @@ -591,7 +591,7 @@ static void test_queuemap(unsigned int task, void *data) static void test_stackmap(unsigned int task, void *data) { const int MAP_SIZE = 32; - __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0; int fd, i; /* Fill test values to be used */ diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 870694f2a359..df2222a1806f 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -460,6 +460,34 @@ static inline void *u64_to_ptr(__u64 ptr) return (void *) (unsigned long) ptr; } +static inline __u32 id_from_prog_fd(int fd) +{ + struct bpf_prog_info prog_info = {}; + __u32 prog_info_len = sizeof(prog_info); + int err; + + err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); + if (!ASSERT_OK(err, "id_from_prog_fd")) + return 0; + + ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); + return prog_info.id; +} + +static inline __u32 id_from_link_fd(int fd) +{ + struct bpf_link_info link_info = {}; + __u32 link_info_len = sizeof(link_info); + int err; + + err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); + if (!ASSERT_OK(err, "id_from_link_fd")) + return 0; + + ASSERT_NEQ(link_info.id, 0, "link_info.id"); + return link_info.id; +} + int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c index 220f6a963813..f997d7ec8fd0 100644 --- a/tools/testing/selftests/bpf/unpriv_helpers.c +++ b/tools/testing/selftests/bpf/unpriv_helpers.c @@ -1,15 +1,76 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <limits.h> #include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <string.h> +#include <sys/utsname.h> #include <unistd.h> #include <fcntl.h> +#include <zlib.h> #include "unpriv_helpers.h" -static bool get_mitigations_off(void) +static gzFile open_config(void) +{ + struct utsname uts; + char buf[PATH_MAX]; + gzFile config; + + if (uname(&uts)) { + perror("uname"); + goto config_gz; + } + + snprintf(buf, sizeof(buf), "/boot/config-%s", uts.release); + config = gzopen(buf, "rb"); + if (config) + return config; + fprintf(stderr, "gzopen %s: %s\n", buf, strerror(errno)); + +config_gz: + config = gzopen("/proc/config.gz", "rb"); + if (!config) + perror("gzopen /proc/config.gz"); + return config; +} + +static int config_contains(const char *pat) +{ + const char *msg; + char buf[1024]; + gzFile config; + int n, err; + + config = open_config(); + if (!config) + return -1; + + for (;;) { + if (!gzgets(config, buf, sizeof(buf))) { + msg = gzerror(config, &err); + if (err == Z_ERRNO) + perror("gzgets /proc/config.gz"); + else if (err != Z_OK) + fprintf(stderr, "gzgets /proc/config.gz: %s", msg); + gzclose(config); + return -1; + } + n = strlen(buf); + if (buf[n - 1] == '\n') + buf[n - 1] = 0; + if (strcmp(buf, pat) == 0) { + gzclose(config); + return 1; + } + } + gzclose(config); + return 0; +} + +static bool cmdline_contains(const char *pat) { char cmdline[4096], *c; int fd, ret = false; @@ -27,7 +88,7 @@ static bool get_mitigations_off(void) cmdline[sizeof(cmdline) - 1] = '\0'; for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) { - if (strncmp(c, "mitigations=off", strlen(c))) + if (strncmp(c, pat, strlen(c))) continue; ret = true; break; @@ -37,8 +98,21 @@ out: return ret; } +static int get_mitigations_off(void) +{ + int enabled_in_config; + + if (cmdline_contains("mitigations=off")) + return 1; + enabled_in_config = config_contains("CONFIG_CPU_MITIGATIONS=y"); + if (enabled_in_config < 0) + return -1; + return !enabled_in_config; +} + bool get_unpriv_disabled(void) { + int mitigations_off; bool disabled; char buf[2]; FILE *fd; @@ -52,5 +126,19 @@ bool get_unpriv_disabled(void) disabled = true; } - return disabled ? true : get_mitigations_off(); + if (disabled) + return true; + + /* + * Some unpriv tests rely on spectre mitigations being on. + * If mitigations are off or status can't be determined + * assume that unpriv tests are disabled. + */ + mitigations_off = get_mitigations_off(); + if (mitigations_off < 0) { + fprintf(stderr, + "Can't determine if mitigations are enabled, disabling unpriv tests."); + return true; + } + return mitigations_off; } diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 18596ae0b0c1..f3492efc8834 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -2409,3 +2409,27 @@ .errstr_unpriv = "", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, +{ + "calls: several args with ref_obj_id", + .insns = { + /* Reserve at least sizeof(struct iphdr) bytes in the ring buffer. + * With a smaller size, the verifier would reject the call to + * bpf_tcp_raw_gen_syncookie_ipv4 before we can reach the + * ref_obj_id error. + */ + BPF_MOV64_IMM(BPF_REG_2, 20), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* if r0 == 0 goto <exit> */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tcp_raw_gen_syncookie_ipv4), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 2 }, + .result = REJECT, + .errstr = "more than one arg with ref_obj_id", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index ee454327e5c6..77207b498c6f 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -2,14 +2,13 @@ "dead code: start", .insns = { BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_JMP_IMM(BPF_JA, 0, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 7), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, }, diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index 43776f6f92f4..91d83e9cb148 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -84,11 +84,10 @@ BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -149,11 +148,10 @@ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -214,11 +212,10 @@ BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -283,11 +280,10 @@ BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -354,11 +350,10 @@ BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -425,11 +420,10 @@ BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -496,11 +490,10 @@ BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -567,11 +560,10 @@ BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -638,11 +630,10 @@ BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -709,11 +700,10 @@ BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -780,11 +770,10 @@ BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c index 11fc68da735e..e901eefd774a 100644 --- a/tools/testing/selftests/bpf/verifier/jset.c +++ b/tools/testing/selftests/bpf/verifier/jset.c @@ -78,12 +78,11 @@ .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .retval = 1, .result = ACCEPT, }, @@ -136,13 +135,12 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2), BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -154,16 +152,16 @@ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff), BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3), BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index b2bb20b00952..d532dd82a3a8 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -23,6 +23,7 @@ #include <float.h> #include <math.h> #include <limits.h> +#include <assert.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -49,6 +50,7 @@ enum stat_id { STACK, PROG_TYPE, ATTACH_TYPE, + MEMORY_PEAK, FILE_NAME, PROG_NAME, @@ -155,13 +157,27 @@ struct filter { bool abs; }; -struct var_preset { - char *name; +struct rvalue { enum { INTEGRAL, ENUMERATOR } type; union { long long ivalue; char *svalue; }; +}; + +struct field_access { + enum { FIELD_NAME, ARRAY_INDEX } type; + union { + char *name; + struct rvalue index; + }; +}; + +struct var_preset { + struct field_access *atoms; + int atom_count; + char *full_name; + struct rvalue value; bool applied; }; @@ -208,6 +224,9 @@ static struct env { int top_src_lines; struct var_preset *presets; int npresets; + char orig_cgroup[PATH_MAX]; + char stat_cgroup[PATH_MAX]; + int memory_peak_fd; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -219,6 +238,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va return vfprintf(stderr, format, args); } +#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__) + +__attribute__((format(printf, 3, 4))) +static int log_errno_aux(const char *file, int line, const char *fmt, ...) +{ + int err = -errno; + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "%s:%d: ", file, line); + vfprintf(stderr, fmt, ap); + fprintf(stderr, " failed with error '%s'.\n", strerror(errno)); + va_end(ap); + return err; +} + #ifndef VERISTAT_VERSION #define VERISTAT_VERSION "<kernel>" #endif @@ -344,6 +379,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "invalid top N specifier: %s\n", arg); argp_usage(state); } + break; case 'C': env.comparison_mode = true; break; @@ -734,13 +770,13 @@ cleanup: } static const struct stat_specs default_csv_output_spec = { - .spec_cnt = 14, + .spec_cnt = 15, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE, - STACK, + STACK, MEMORY_PEAK, }, }; @@ -781,6 +817,7 @@ static struct stat_def { [STACK] = {"Stack depth", {"stack_depth", "stack"}, }, [PROG_TYPE] = { "Program type", {"prog_type"}, }, [ATTACH_TYPE] = { "Attach type", {"attach_type", }, }, + [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, }, }; static bool parse_stat_id_var(const char *name, size_t len, int *id, @@ -854,6 +891,18 @@ static bool is_desc_sym(char c) return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_'; } +static char *rtrim(char *str) +{ + int i; + + for (i = strlen(str) - 1; i > 0; --i) { + if (!isspace(str[i])) + break; + str[i] = '\0'; + } + return str; +} + static int parse_stat(const char *stat_name, struct stat_specs *specs) { int id; @@ -1182,6 +1231,7 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch case BPF_MAP_TYPE_TASK_STORAGE: case BPF_MAP_TYPE_INODE_STORAGE: case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: break; case BPF_MAP_TYPE_STRUCT_OPS: mask_unrelated_struct_ops_progs(obj, map, prog); @@ -1278,16 +1328,243 @@ static int max_verifier_log_size(void) return log_size; } +static bool output_stat_enabled(int id) +{ + int i; + + for (i = 0; i < env.output_spec.spec_cnt; i++) + if (env.output_spec.ids[i] == id) + return true; + return false; +} + +__attribute__((format(printf, 2, 3))) +static int write_one_line(const char *file, const char *fmt, ...) +{ + int err, saved_errno; + va_list ap; + FILE *f; + + f = fopen(file, "w"); + if (!f) + return -1; + + va_start(ap, fmt); + errno = 0; + err = vfprintf(f, fmt, ap); + saved_errno = errno; + va_end(ap); + fclose(f); + errno = saved_errno; + return err < 0 ? -1 : 0; +} + +__attribute__((format(scanf, 3, 4))) +static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...) +{ + int res = 0, saved_errno = 0; + char *line = NULL; + size_t line_len; + va_list ap; + FILE *f; + + f = fopen(file, "r"); + if (!f) + return -1; + + va_start(ap, fmt); + while (getline(&line, &line_len, f) > 0) { + res = vsscanf(line, fmt, ap); + if (res == fields_expected) + goto out; + } + if (ferror(f)) { + saved_errno = errno; + res = -1; + } + +out: + va_end(ap); + free(line); + fclose(f); + errno = saved_errno; + return res; +} + +static void destroy_stat_cgroup(void) +{ + char buf[PATH_MAX]; + int err; + + close(env.memory_peak_fd); + + if (env.orig_cgroup[0]) { + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) + log_errno("moving self to original cgroup %s\n", env.orig_cgroup); + } + + if (env.stat_cgroup[0]) { + err = rmdir(env.stat_cgroup); + if (err < 0) + log_errno("deletion of cgroup %s", env.stat_cgroup); + } + + env.memory_peak_fd = -1; + env.orig_cgroup[0] = 0; + env.stat_cgroup[0] = 0; +} + +/* + * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-<pid>, + * moves current process to this cgroup. + */ +static void create_stat_cgroup(void) +{ + char cgroup_fs_mount[4096]; + char buf[4096]; + int err; + + env.memory_peak_fd = -1; + + if (!output_stat_enabled(MEMORY_PEAK)) + return; + + err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s", + cgroup_fs_mount, buf); + if (err != 2) { + if (err < 0) + log_errno("reading /proc/self/mounts"); + else if (!env.quiet) + fprintf(stderr, "Can't find cgroupfs v2 mount point.\n"); + goto err_out; + } + + /* cgroup-v2.rst promises the line "0::<group>" for cgroups v2 */ + err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf); + if (err != 1) { + if (err < 0) + log_errno("reading /proc/self/cgroup"); + else if (!env.quiet) + fprintf(stderr, "Can't infer veristat process cgroup."); + goto err_out; + } + + snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf); + + snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid()); + err = mkdir(buf, 0777); + if (err < 0) { + log_errno("creation of cgroup %s", buf); + goto err_out; + } + strcpy(env.stat_cgroup, buf); + + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) { + log_errno("entering cgroup %s", buf); + goto err_out; + } + + snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup); + env.memory_peak_fd = open(buf, O_RDWR | O_APPEND); + if (env.memory_peak_fd < 0) { + log_errno("opening %s", buf); + goto err_out; + } + + return; + +err_out: + if (!env.quiet) + fprintf(stderr, "Memory usage metric unavailable.\n"); + destroy_stat_cgroup(); +} + +/* Current value of /sys/fs/cgroup/veristat-accounting-<pid>/memory.peak */ +static long cgroup_memory_peak(void) +{ + long err, memory_peak; + char buf[32]; + + if (env.memory_peak_fd < 0) + return -1; + + err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0); + if (err <= 0) { + log_errno("pread(%s/memory.peak)", env.stat_cgroup); + return -1; + } + + buf[err] = 0; + errno = 0; + memory_peak = strtoll(buf, NULL, 10); + if (errno) { + log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf); + return -1; + } + + return memory_peak; +} + +static int reset_stat_cgroup(void) +{ + char buf[] = "r\n"; + int err; + + if (env.memory_peak_fd < 0) + return -1; + + err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0); + if (err <= 0) { + log_errno("pwrite(%s/memory.peak)", env.stat_cgroup); + return -1; + } + return 0; +} + +static int parse_rvalue(const char *val, struct rvalue *rvalue) +{ + long long value; + char *val_end; + + if (val[0] == '-' || isdigit(val[0])) { + /* must be a number */ + errno = 0; + value = strtoll(val, &val_end, 0); + if (errno == ERANGE) { + errno = 0; + value = strtoull(val, &val_end, 0); + } + if (errno || *val_end != '\0') { + fprintf(stderr, "Failed to parse value '%s'\n", val); + return -EINVAL; + } + rvalue->ivalue = value; + rvalue->type = INTEGRAL; + } else { + /* if not a number, consider it enum value */ + rvalue->svalue = strdup(val); + if (!rvalue->svalue) + return -ENOMEM; + rvalue->type = ENUMERATOR; + } + return 0; +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); const char *prog_name = bpf_program__name(prog); + long mem_peak_a, mem_peak_b, mem_peak = -1; char *buf; int buf_sz, log_level; struct verif_stats *stats; struct bpf_prog_info info; __u32 info_len = sizeof(info); - int err = 0; + int err = 0, cgroup_err; void *tmp; int fd; @@ -1332,7 +1609,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_reg_invariants) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); - err = bpf_object__load(obj); + err = bpf_object__prepare(obj); + if (!err) { + cgroup_err = reset_stat_cgroup(); + mem_peak_a = cgroup_memory_peak(); + err = bpf_object__load(obj); + mem_peak_b = cgroup_memory_peak(); + if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0) + mem_peak = mem_peak_b - mem_peak_a; + } env.progs_processed++; stats->file_name = strdup(base_filename); @@ -1341,6 +1626,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats->stats[SIZE] = bpf_program__insn_cnt(prog); stats->stats[PROG_TYPE] = bpf_program__type(prog); stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog); + stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024); memset(&info, 0, info_len); fd = bpf_program__fd(prog); @@ -1361,15 +1647,74 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf free(buf); return 0; -}; +} + +static int append_preset_atom(struct var_preset *preset, char *value, bool is_index) +{ + struct field_access *tmp; + int i = preset->atom_count; + int err; + + tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms)); + if (!tmp) + return -ENOMEM; + + preset->atoms = tmp; + preset->atom_count++; + + if (is_index) { + preset->atoms[i].type = ARRAY_INDEX; + err = parse_rvalue(value, &preset->atoms[i].index); + if (err) + return err; + } else { + preset->atoms[i].type = FIELD_NAME; + preset->atoms[i].name = strdup(value); + if (!preset->atoms[i].name) + return -ENOMEM; + } + return 0; +} + +static int parse_var_atoms(const char *full_var, struct var_preset *preset) +{ + char expr[256], var[256], *name, *saveptr; + int n, len, off, err; + + snprintf(expr, sizeof(expr), "%s", full_var); + preset->atom_count = 0; + while ((name = strtok_r(preset->atom_count ? NULL : expr, ".", &saveptr))) { + len = strlen(name); + /* parse variable name */ + if (sscanf(name, "%[a-zA-Z0-9_] %n", var, &off) != 1) { + fprintf(stderr, "Can't parse %s\n", name); + return -EINVAL; + } + err = append_preset_atom(preset, var, false); + if (err) + return err; + + /* parse optional array indexes */ + while (off < len) { + if (sscanf(name + off, " [ %[a-zA-Z0-9_] ] %n", var, &n) != 1) { + fprintf(stderr, "Can't parse %s as index\n", name + off); + return -EINVAL; + } + err = append_preset_atom(preset, var, true); + if (err) + return err; + off += n; + } + } + return 0; +} static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr) { void *tmp; struct var_preset *cur; - char var[256], val[256], *val_end; - long long value; - int n; + char var[256], val[256]; + int n, err; tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets)); if (!tmp) @@ -1379,37 +1724,25 @@ static int append_var_preset(struct var_preset **presets, int *cnt, const char * memset(cur, 0, sizeof(*cur)); (*cnt)++; - if (sscanf(expr, "%s = %s %n", var, val, &n) != 2 || n != strlen(expr)) { + if (sscanf(expr, " %[][a-zA-Z0-9_. ] = %s %n", var, val, &n) != 2 || n != strlen(expr)) { fprintf(stderr, "Failed to parse expression '%s'\n", expr); return -EINVAL; } + /* Remove trailing spaces from var, as scanf may add those */ + rtrim(var); - if (val[0] == '-' || isdigit(val[0])) { - /* must be a number */ - errno = 0; - value = strtoll(val, &val_end, 0); - if (errno == ERANGE) { - errno = 0; - value = strtoull(val, &val_end, 0); - } - if (errno || *val_end != '\0') { - fprintf(stderr, "Failed to parse value '%s'\n", val); - return -EINVAL; - } - cur->ivalue = value; - cur->type = INTEGRAL; - } else { - /* if not a number, consider it enum value */ - cur->svalue = strdup(val); - if (!cur->svalue) - return -ENOMEM; - cur->type = ENUMERATOR; - } + err = parse_rvalue(val, &cur->value); + if (err) + return err; - cur->name = strdup(var); - if (!cur->name) + cur->full_name = strdup(var); + if (!cur->full_name) return -ENOMEM; + err = parse_var_atoms(var, cur); + if (err) + return err; + return 0; } @@ -1486,22 +1819,96 @@ static bool is_preset_supported(const struct btf_type *t) return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); } -const int btf_find_member(const struct btf *btf, - const struct btf_type *parent_type, - __u32 parent_offset, - const char *member_name, - int *member_tid, - __u32 *member_offset) +static int find_enum_value(const struct btf *btf, const char *name, long long *value) +{ + const struct btf_type *t; + int cnt, i; + long long lvalue; + + cnt = btf__type_cnt(btf); + for (i = 1; i != cnt; ++i) { + t = btf__type_by_id(btf, i); + + if (!btf_is_any_enum(t)) + continue; + + if (enum_value_from_name(btf, t, name, &lvalue) == 0) { + *value = lvalue; + return 0; + } + } + return -ESRCH; +} + +static int resolve_rvalue(struct btf *btf, const struct rvalue *rvalue, long long *result) +{ + int err = 0; + + switch (rvalue->type) { + case INTEGRAL: + *result = rvalue->ivalue; + return 0; + case ENUMERATOR: + err = find_enum_value(btf, rvalue->svalue, result); + if (err) { + fprintf(stderr, "Can't resolve enum value %s\n", rvalue->svalue); + return err; + } + return 0; + default: + fprintf(stderr, "Unknown rvalue type\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int adjust_var_secinfo_array(struct btf *btf, int tid, struct field_access *atom, + const char *array_name, struct btf_var_secinfo *sinfo) +{ + const struct btf_type *t; + struct btf_array *barr; + long long idx; + int err; + + tid = btf__resolve_type(btf, tid); + t = btf__type_by_id(btf, tid); + if (!btf_is_array(t)) { + fprintf(stderr, "Array index is not expected for %s\n", + array_name); + return -EINVAL; + } + barr = btf_array(t); + err = resolve_rvalue(btf, &atom->index, &idx); + if (err) + return err; + if (idx < 0 || idx >= barr->nelems) { + fprintf(stderr, "Array index %lld is out of bounds [0, %u): %s\n", + idx, barr->nelems, array_name); + return -EINVAL; + } + sinfo->size = btf__resolve_size(btf, barr->type); + sinfo->offset += sinfo->size * idx; + sinfo->type = btf__resolve_type(btf, barr->type); + return 0; +} + +static int adjust_var_secinfo_member(const struct btf *btf, + const struct btf_type *parent_type, + __u32 parent_offset, + const char *member_name, + struct btf_var_secinfo *sinfo) { int i; - if (!btf_is_composite(parent_type)) + if (!btf_is_composite(parent_type)) { + fprintf(stderr, "Can't resolve field %s for non-composite type\n", member_name); return -EINVAL; + } for (i = 0; i < btf_vlen(parent_type); ++i) { const struct btf_member *member; const struct btf_type *member_type; - int tid; + int tid, off; member = btf_members(parent_type) + i; tid = btf__resolve_type(btf, member->type); @@ -1509,6 +1916,7 @@ const int btf_find_member(const struct btf *btf, return -EINVAL; member_type = btf__type_by_id(btf, tid); + off = parent_offset + member->offset; if (member->name_off) { const char *name = btf__name_by_offset(btf, member->name_off); @@ -1518,48 +1926,62 @@ const int btf_find_member(const struct btf *btf, name); return -EINVAL; } - *member_offset = parent_offset + member->offset; - *member_tid = tid; + sinfo->offset += off / 8; + sinfo->type = tid; + sinfo->size = member_type->size; return 0; } } else if (btf_is_composite(member_type)) { int err; - err = btf_find_member(btf, member_type, parent_offset + member->offset, - member_name, member_tid, member_offset); + err = adjust_var_secinfo_member(btf, member_type, off, + member_name, sinfo); if (!err) return 0; } } - return -EINVAL; + return -ESRCH; } static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t, - struct btf_var_secinfo *sinfo, const char *var) + struct btf_var_secinfo *sinfo, struct var_preset *preset) { - char expr[256], *saveptr; - const struct btf_type *base_type, *member_type; - int err, member_tid; - char *name; - __u32 member_offset = 0; + const struct btf_type *base_type; + const char *prev_name; + int err, i; + int tid; - base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); - snprintf(expr, sizeof(expr), "%s", var); - strtok_r(expr, ".", &saveptr); + assert(preset->atom_count > 0); + assert(preset->atoms[0].type == FIELD_NAME); - while ((name = strtok_r(NULL, ".", &saveptr))) { - err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset); - if (err) { - fprintf(stderr, "Could not find member %s for variable %s\n", name, var); - return err; + tid = btf__resolve_type(btf, t->type); + base_type = btf__type_by_id(btf, tid); + prev_name = preset->atoms[0].name; + + for (i = 1; i < preset->atom_count; ++i) { + struct field_access *atom = preset->atoms + i; + + switch (atom->type) { + case ARRAY_INDEX: + err = adjust_var_secinfo_array(btf, tid, atom, prev_name, sinfo); + break; + case FIELD_NAME: + err = adjust_var_secinfo_member(btf, base_type, 0, atom->name, sinfo); + if (err == -ESRCH) + fprintf(stderr, "Can't find '%s'\n", atom->name); + prev_name = atom->name; + break; + default: + fprintf(stderr, "Unknown field_access type\n"); + return -EOPNOTSUPP; } - member_type = btf__type_by_id(btf, member_tid); - sinfo->offset += member_offset / 8; - sinfo->size = member_type->size; - sinfo->type = member_tid; - base_type = member_type; + if (err) + return err; + base_type = btf__type_by_id(btf, sinfo->type); + tid = sinfo->type; } + return 0; } @@ -1569,7 +1991,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, { const struct btf_type *base_type; void *ptr; - long long value = preset->ivalue; + long long value = preset->value.ivalue; size_t size; base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type)); @@ -1578,22 +2000,23 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, return -EINVAL; } if (!is_preset_supported(base_type)) { - fprintf(stderr, "Setting value for type %s is not supported\n", - btf__name_by_offset(btf, base_type->name_off)); + fprintf(stderr, "Can't set %s. Only ints and enums are supported\n", + preset->full_name); return -EINVAL; } - if (preset->type == ENUMERATOR) { + if (preset->value.type == ENUMERATOR) { if (btf_is_any_enum(base_type)) { - if (enum_value_from_name(btf, base_type, preset->svalue, &value)) { + if (enum_value_from_name(btf, base_type, preset->value.svalue, &value)) { fprintf(stderr, "Failed to find integer value for enum element %s\n", - preset->svalue); + preset->value.svalue); return -EINVAL; } } else { fprintf(stderr, "Value %s is not supported for type %s\n", - preset->svalue, btf__name_by_offset(btf, base_type->name_off)); + preset->value.svalue, + btf__name_by_offset(btf, base_type->name_off)); return -EINVAL; } } @@ -1660,20 +2083,16 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (j = 0; j < n; ++j, ++sinfo) { const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); const char *var_name; - int var_len; if (!btf_is_var(var_type)) continue; var_name = btf__name_by_offset(btf, var_type->name_off); - var_len = strlen(var_name); for (k = 0; k < npresets; ++k) { struct btf_var_secinfo tmp_sinfo; - if (strncmp(var_name, presets[k].name, var_len) != 0 || - (presets[k].name[var_len] != '\0' && - presets[k].name[var_len] != '.')) + if (strcmp(var_name, presets[k].atoms[0].name) != 0) continue; if (presets[k].applied) { @@ -1683,7 +2102,7 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i } tmp_sinfo = *sinfo; err = adjust_var_secinfo(btf, var_type, - &tmp_sinfo, presets[k].name); + &tmp_sinfo, presets + k); if (err) return err; @@ -1698,7 +2117,8 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (i = 0; i < npresets; ++i) { if (!presets[i].applied) { fprintf(stderr, "Global variable preset %s has not been applied\n", - presets[i].name); + presets[i].full_name); + err = -EINVAL; } presets[i].applied = false; } @@ -1824,6 +2244,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: + case MEMORY_PEAK: case MARK_READ_MAX_LEN: { long v1 = s1->stats[id]; long v2 = s2->stats[id]; @@ -2053,6 +2474,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, case STACK: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: *val = s ? s->stats[id] : 0; break; default: @@ -2139,6 +2561,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats case MARK_READ_MAX_LEN: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: case STACK: { long val; int err, n; @@ -2776,7 +3199,7 @@ static void output_prog_stats(void) static int handle_verif_mode(void) { - int i, err; + int i, err = 0; if (env.filename_cnt == 0) { fprintf(stderr, "Please provide path to BPF object file!\n\n"); @@ -2784,11 +3207,12 @@ static int handle_verif_mode(void) return -EINVAL; } + create_stat_cgroup(); for (i = 0; i < env.filename_cnt; i++) { err = process_obj(env.filenames[i]); if (err) { fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); - return err; + goto out; } } @@ -2796,7 +3220,9 @@ static int handle_verif_mode(void) output_prog_stats(); - return 0; +out: + destroy_stat_cgroup(); + return err; } static int handle_replay_mode(void) @@ -2826,7 +3252,7 @@ static int handle_replay_mode(void) int main(int argc, char **argv) { - int err = 0, i; + int err = 0, i, j; if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) return 1; @@ -2885,9 +3311,19 @@ int main(int argc, char **argv) } free(env.deny_filters); for (i = 0; i < env.npresets; ++i) { - free(env.presets[i].name); - if (env.presets[i].type == ENUMERATOR) - free(env.presets[i].svalue); + free(env.presets[i].full_name); + for (j = 0; j < env.presets[i].atom_count; ++j) { + switch (env.presets[i].atoms[j].type) { + case FIELD_NAME: + free(env.presets[i].atoms[j].name); + break; + case ARRAY_INDEX: + if (env.presets[i].atoms[j].index.type == ENUMERATOR) + free(env.presets[i].atoms[j].index.svalue); + break; + } + } + free(env.presets[i].atoms); } free(env.presets); return -err; diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 79505d294c44..2f869daf8a06 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -43,6 +43,15 @@ riscv64) BZIMAGE="arch/riscv/boot/Image" ARCH="riscv" ;; +ppc64el) + QEMU_BINARY=qemu-system-ppc64 + QEMU_CONSOLE="hvc0" + # KVM could not be tested for powerpc, therefore not enabled for now. + HOST_FLAGS=(-machine pseries -cpu POWER9) + CROSS_FLAGS=(-machine pseries -cpu POWER9) + BZIMAGE="vmlinux" + ARCH="powerpc" + ;; *) echo "Unsupported architecture" exit 1 diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 0ced4026ee44..a29de0713f19 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -109,6 +109,8 @@ #include <network_helpers.h> +#define MAX_TX_BUDGET_DEFAULT 32 + static bool opt_verbose; static bool opt_print_tests; static enum test_mode opt_mode = TEST_MODE_ALL; @@ -1091,11 +1093,45 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) return true; } +static u32 load_value(u32 *counter) +{ + return __atomic_load_n(counter, __ATOMIC_ACQUIRE); +} + +static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret) +{ + u32 max_budget = MAX_TX_BUDGET_DEFAULT; + u32 cons, ready_to_send; + int delta; + + cons = load_value(xsk->tx.consumer); + ready_to_send = load_value(xsk->tx.producer) - cons; + *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + + delta = load_value(xsk->tx.consumer) - cons; + /* By default, xsk should consume exact @max_budget descs at one + * send in this case where hitting the max budget limit in while + * loop is triggered in __xsk_generic_xmit(). Please make sure that + * the number of descs to be sent is larger than @max_budget, or + * else the tx.consumer will be updated in xskq_cons_peek_desc() + * in time which hides the issue we try to verify. + */ + if (ready_to_send > max_budget && delta != max_budget) + return false; + + return true; +} + static int kick_tx(struct xsk_socket_info *xsk) { int ret; - ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + if (xsk->check_consumer) { + if (!kick_tx_with_check(xsk, &ret)) + return TEST_FAILURE; + } else { + ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + } if (ret >= 0) return TEST_PASS; if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { @@ -2613,6 +2649,23 @@ static int testapp_adjust_tail_grow_mb(struct test_spec *test) XSK_UMEM__LARGE_FRAME_SIZE * 2); } +static int testapp_tx_queue_consumer(struct test_spec *test) +{ + int nr_packets; + + if (test->mode == TEST_MODE_ZC) { + ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n"); + return TEST_SKIP; + } + + nr_packets = MAX_TX_BUDGET_DEFAULT + 1; + pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE); + test->ifobj_tx->xsk->batch_size = nr_packets; + test->ifobj_tx->xsk->check_consumer = true; + + return testapp_validate_traffic(test); +} + static void run_pkt_test(struct test_spec *test) { int ret; @@ -2723,6 +2776,7 @@ static const struct test_spec tests[] = { {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, + {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer}, }; static void print_tests(void) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 67fc44b2813b..4df3a5d329ac 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -95,6 +95,7 @@ struct xsk_socket_info { u32 batch_size; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; + bool check_consumer; }; struct pkt { diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index be780bcb73a3..3556f3563e08 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -12,14 +12,17 @@ TEST_GEN_FILES := \ TEST_PROGS := \ napi_id.py \ netcons_basic.sh \ + netcons_cmdline.sh \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ netcons_sysdata.sh \ + netpoll_basic.py \ ping.py \ queues.py \ stats.py \ shaper.py \ hds.py \ + xdp.py \ # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index df2c047ffa90..fdc97355588c 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -16,6 +16,7 @@ TEST_PROGS = \ irq.py \ loopback.sh \ pp_alloc_fail.py \ + rss_api.py \ rss_ctx.py \ rss_input_xfrm.py \ tso.py \ diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py new file mode 100755 index 000000000000..ead6784d1910 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py @@ -0,0 +1,465 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Devlink Rate TC Bandwidth Test Suite +=================================== + +This test suite verifies the functionality of devlink-rate traffic class (TC) +bandwidth distribution in a virtualized environment. The tests validate that +bandwidth can be properly allocated between different traffic classes and +that TC mapping works as expected. + +Test Environment: +---------------- +- Creates 1 VF +- Establishes a bridge connecting the VF representor and the uplink representor +- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102) +- Configures different traffic classes (TC3 and TC4) for each VLAN + +Test Cases: +---------- +1. test_no_tc_mapping_bandwidth: + - Verifies that without TC mapping, bandwidth is NOT distributed according to + the configured 80/20 split between TC4 and TC3 + - This test should fail if bandwidth matches the 80/20 split without TC + mapping + - Expected: Bandwidth should NOT be distributed as 80/20 + +2. test_tc_mapping_bandwidth: + - Configures TC mapping using mqprio qdisc + - Verifies that with TC mapping, bandwidth IS distributed according to the + configured 80/20 split between TC3 and TC4 + - Expected: Bandwidth should be distributed as 80/20 + +Bandwidth Distribution: +---------------------- +- TC3 (VLAN 101): Configured for 80% of total bandwidth +- TC4 (VLAN 102): Configured for 20% of total bandwidth +- Total bandwidth: 1Gbps +- Tolerance: +-12% + +Hardware-Specific Behavior (mlx5): +-------------------------- +mlx5 hardware enforces traffic class separation by ensuring that each transmit +queue (SQ) is associated with a single TC. If a packet is sent on a queue that +doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set +mapping), the hardware moves the queue to the correct TC scheduler to preserve +traffic isolation. + +This behavior means that even without explicit TC-to-queue mapping, bandwidth +enforcement may still appear to work—because the hardware dynamically adjusts +the scheduling context. However, this can lead to performance issues in high +rates and HOL blocking if traffic from different TCs is mixed on the same queue. +""" + +import json +import os +import subprocess +import threading +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit +from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx +from lib.py import NetDrvEpEnv, DevlinkFamily +from lib.py import NlError +from lib.py import cmd, defer, ethtool, ip + + +class BandwidthValidator: + """ + Validates bandwidth totals and per-TC shares against expected values + with a tolerance. + """ + + def __init__(self): + self.tolerance_percent = 12 + self.expected_total_gbps = 1.0 + self.total_min_expected = self.min_expected(self.expected_total_gbps) + self.total_max_expected = self.max_expected(self.expected_total_gbps) + self.tc_expected_percent = { + 3: 20.0, + 4: 80.0, + } + + def min_expected(self, value): + """Calculates the minimum acceptable value based on tolerance.""" + return value - (value * self.tolerance_percent / 100) + + def max_expected(self, value): + """Calculates the maximum acceptable value based on tolerance.""" + return value + (value * self.tolerance_percent / 100) + + def bound(self, expected, value): + """Returns True if value is within expected tolerance.""" + return self.min_expected(expected) <= value <= self.max_expected(expected) + + def tc_bandwidth_bound(self, value, tc_ix): + """ + Returns True if the given bandwidth value is within tolerance + for the TC's expected bandwidth. + """ + expected = self.tc_expected_percent[tc_ix] + return self.bound(expected, value) + + +def setup_vf(cfg, set_tc_mapping=True): + """ + Sets up a VF on the given network interface. + + Enables SR-IOV and switchdev mode, brings the VF interface up, + and optionally configures TC mapping using mqprio. + """ + try: + cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev") + defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy") + except Exception as exc: + raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc + try: + cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs") + defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs") + except Exception as exc: + raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc + + time.sleep(2) + vf_ifc = (os.listdir( + f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0] + if vf_ifc: + ip(f"link set dev {vf_ifc} up") + else: + raise KsftSkipEx("VF interface not found") + if set_tc_mapping: + cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8") + + return vf_ifc + + +def setup_vlans_on_vf(vf_ifc): + """ + Sets up two VLAN interfaces on the given VF, each mapped to a different TC. + """ + vlan_configs = [ + {"vlan_id": 101, "tc": 3, "ip": "198.51.100.2"}, + {"vlan_id": 102, "tc": 4, "ip": "198.51.100.10"}, + ] + + for config in vlan_configs: + vlan_dev = f"{vf_ifc}.{config['vlan_id']}" + ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}") + ip(f"addr add {config['ip']}/29 dev {vlan_dev}") + ip(f"link set dev {vlan_dev} up") + ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}") + ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}") + + +def get_vf_info(cfg): + """ + Finds the VF representor interface and devlink port index + for the given PCI device used in the test environment. + """ + cfg.vf_representor = None + cfg.vf_port_index = None + out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8") + ports = json.loads(out)["port"] + + for port_name, props in ports.items(): + netdev = props.get("netdev") + + if (port_name.startswith(f"pci/{cfg.pci}/") and + props.get("vfnum") == 0): + cfg.vf_representor = netdev + cfg.vf_port_index = int(port_name.split("/")[-1]) + break + + +def setup_bridge(cfg): + """ + Creates and configures a Linux bridge, with both the uplink + and VF representor interfaces attached to it. + """ + bridge_name = f"br_{os.getpid()}" + ip(f"link add name {bridge_name} type bridge") + defer(cmd, f"ip link del name {bridge_name} type bridge") + + ip(f"link set dev {cfg.ifname} master {bridge_name}") + + rep_name = cfg.vf_representor + if rep_name: + ip(f"link set dev {rep_name} master {bridge_name}") + ip(f"link set dev {rep_name} up") + ksft_pr(f"Set representor {rep_name} up and added to bridge") + else: + raise KsftSkipEx("Could not find representor for the VF") + + ip(f"link set dev {bridge_name} up") + + +def setup_devlink_rate(cfg): + """ + Configures devlink rate tx_max and traffic class bandwidth for the VF. + """ + port_index = cfg.vf_port_index + if port_index is None: + raise KsftSkipEx("Could not find VF port index") + try: + cfg.devnl.rate_set({ + "bus-name": "pci", + "dev-name": cfg.pci, + "port-index": port_index, + "rate-tx-max": 125000000, + "rate-tc-bws": [ + {"index": 0, "bw": 0}, + {"index": 1, "bw": 0}, + {"index": 2, "bw": 0}, + {"index": 3, "bw": 20}, + {"index": 4, "bw": 80}, + {"index": 5, "bw": 0}, + {"index": 6, "bw": 0}, + {"index": 7, "bw": 0}, + ] + }) + except NlError as exc: + if exc.error == 95: # EOPNOTSUPP + raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc + raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc + + +def setup_remote_server(cfg): + """ + Sets up VLAN interfaces and starts iperf3 servers on the remote side. + """ + remote_dev = cfg.remote_ifname + vlan_ids = [101, 102] + remote_ips = ["198.51.100.1", "198.51.100.9"] + + for vlan_id, ip_addr in zip(vlan_ids, remote_ips): + vlan_dev = f"{remote_dev}.{vlan_id}" + cmd(f"ip link add link {remote_dev} name {vlan_dev} " + f"type vlan id {vlan_id}", host=cfg.remote) + cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote) + cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote) + cmd(f"iperf3 -s -1 -B {ip_addr}",background=True, host=cfg.remote) + defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote) + + +def setup_test_environment(cfg, set_tc_mapping=True): + """ + Sets up the complete test environment including VF creation, VLANs, + bridge configuration, devlink rate setup, and the remote server. + """ + vf_ifc = setup_vf(cfg, set_tc_mapping) + ksft_pr(f"Created VF interface: {vf_ifc}") + + setup_vlans_on_vf(vf_ifc) + + get_vf_info(cfg) + setup_bridge(cfg) + + setup_devlink_rate(cfg) + setup_remote_server(cfg) + time.sleep(2) + + +def run_iperf_client(server_ip, local_ip, barrier, min_expected_gbps=0.1): + """ + Runs a single iperf3 client instance, binding to the given local IP. + Waits on a barrier to synchronize with other threads. + """ + try: + barrier.wait(timeout=10) + except Exception as exc: + raise KsftFailEx("iperf3 barrier wait timed") from exc + + iperf_cmd = ["iperf3", "-c", server_ip, "-B", local_ip, "-J"] + result = subprocess.run(iperf_cmd, capture_output=True, text=True, + check=True) + + try: + output = json.loads(result.stdout) + bits_per_second = output["end"]["sum_received"]["bits_per_second"] + gbps = bits_per_second / 1e9 + if gbps < min_expected_gbps: + ksft_pr( + f"iperf3 bandwidth too low: {gbps:.2f} Gbps " + f"(expected ≥ {min_expected_gbps} Gbps)" + ) + return None + return gbps + except json.JSONDecodeError as exc: + ksft_pr(f"Failed to parse iperf3 JSON output: {exc}") + return None + + +def run_bandwidth_test(): + """ + Launches iperf3 client threads for each VLAN/TC pair and collects results. + """ + def _run_iperf_client_thread(server_ip, local_ip, results, barrier, tc_ix): + results[tc_ix] = run_iperf_client(server_ip, local_ip, barrier) + + vf_vlan_data = [ + # (local_ip, remote_ip, TC) + ("198.51.100.2", "198.51.100.1", 3), + ("198.51.100.10", "198.51.100.9", 4), + ] + + results = {} + threads = [] + start_barrier = threading.Barrier(len(vf_vlan_data)) + + for local_ip, remote_ip, tc_ix in vf_vlan_data: + thread = threading.Thread( + target=_run_iperf_client_thread, + args=(remote_ip, local_ip, results, start_barrier, tc_ix) + ) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + + for tc_ix, tc_bw in results.items(): + if tc_bw is None: + raise KsftFailEx("iperf3 client failed; cannot evaluate bandwidth") + + return results + +def calculate_bandwidth_percentages(results): + """ + Calculates the percentage of total bandwidth received by TC3 and TC4. + """ + if 3 not in results or 4 not in results: + raise KsftFailEx(f"Missing expected TC results in {results}") + + tc3_bw = results[3] + tc4_bw = results[4] + total_bw = tc3_bw + tc4_bw + tc3_percentage = (tc3_bw / total_bw) * 100 + tc4_percentage = (tc4_bw / total_bw) * 100 + + return { + 'tc3_bw': tc3_bw, + 'tc4_bw': tc4_bw, + 'tc3_percentage': tc3_percentage, + 'tc4_percentage': tc4_percentage, + 'total_bw': total_bw + } + + +def print_bandwidth_results(bw_data, test_name): + """ + Prints bandwidth measurements and TC usage summary for a given test. + """ + ksft_pr(f"Bandwidth check results {test_name}:") + ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec") + ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%") + ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%") + + +def verify_total_bandwidth(bw_data, validator): + """ + Ensures the total measured bandwidth falls within the acceptable tolerance. + """ + total = bw_data['total_bw'] + + if validator.bound(validator.expected_total_gbps, total): + return + + if total < validator.total_min_expected: + raise KsftSkipEx( + f"Total bandwidth {total:.2f} Gbps < minimum " + f"{validator.total_min_expected:.2f} Gbps; " + f"parent tx_max ({validator.expected_total_gbps:.1f} G) " + f"not reached, cannot validate share" + ) + + raise KsftFailEx( + f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling " + f"{validator.total_max_expected:.2f} Gbps " + f"(VF tx_max set to {validator.expected_total_gbps:.1f} G)" + ) + + +def check_bandwidth_distribution(bw_data, validator): + """ + Checks whether the measured TC3 and TC4 bandwidth percentages + fall within their expected tolerance ranges. + + Returns: + bool: True if both TC3 and TC4 percentages are within bounds. + """ + tc3_valid = validator.tc_bandwidth_bound(bw_data['tc3_percentage'], 3) + tc4_valid = validator.tc_bandwidth_bound(bw_data['tc4_percentage'], 4) + + return tc3_valid and tc4_valid + + +def run_bandwidth_distribution_test(cfg, set_tc_mapping): + """ + Runs parallel iperf3 tests for both TCs and collects results. + """ + setup_test_environment(cfg, set_tc_mapping) + bandwidths = run_bandwidth_test() + bw_data = calculate_bandwidth_percentages(bandwidths) + test_name = "with TC mapping" if set_tc_mapping else "without TC mapping" + print_bandwidth_results(bw_data, test_name) + + verify_total_bandwidth(bw_data, cfg.bw_validator) + + return check_bandwidth_distribution(bw_data, cfg.bw_validator) + + +def test_no_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is not split 80/20 without traffic class mapping. + """ + pass_bw_msg = "Bandwidth is NOT distributed as 80/20 without TC mapping" + fail_bw_msg = "Bandwidth matched 80/20 split without TC mapping" + is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout + + if run_bandwidth_distribution_test(cfg, set_tc_mapping=False): + if is_mlx5: + raise KsftXfailEx(fail_bw_msg) + raise KsftFailEx(fail_bw_msg) + if is_mlx5: + raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg) + ksft_pr(pass_bw_msg) + + +def test_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is correctly split 80/20 between TC3 and TC4 + when traffic class mapping is set. + """ + if run_bandwidth_distribution_test(cfg, set_tc_mapping=True): + ksft_pr("Bandwidth is distributed as 80/20 with TC mapping") + else: + raise KsftFailEx("Bandwidth did not match 80/20 split with TC mapping") + + +def main() -> None: + """ + Main entry point for running the test cases. + """ + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.devnl = DevlinkFamily() + + cfg.pci = os.path.basename( + os.path.realpath(f"/sys/class/net/{cfg.ifname}/device") + ) + if not cfg.pci: + raise KsftSkipEx("Could not get PCI address of the interface") + cfg.require_cmd("iperf3", local=True, remote=True) + + cfg.bw_validator = BandwidthValidator() + + cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth] + + ksft_run(cases=cases, args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 7947650210a0..baa2f24240ba 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -51,15 +51,14 @@ def check_tx(cfg) -> None: @ksft_disruptive def check_tx_chunks(cfg) -> None: - cfg.require_ipver("6") require_devmem(cfg) port = rand_port() - listen_cmd = f"socat -U - TCP6-LISTEN:{port}" + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" with bkg(listen_cmd, exit_wait=True) as socat: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 9c03fd777f3d..712c806508b5 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -3,37 +3,37 @@ import re from os import path -from lib.py import ksft_run, ksft_exit +from lib.py import ksft_run, ksft_exit, KsftSkipEx from lib.py import NetDrvEpEnv from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen def _get_current_settings(cfg): - output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0] + output = ethtool(f"-g {cfg.ifname}", json=True)[0] return (output['rx'], output['hds-thresh']) def _get_combined_channels(cfg): - output = ethtool(f"-l {cfg.ifname}", host=cfg.remote).stdout + output = ethtool(f"-l {cfg.ifname}").stdout values = re.findall(r'Combined:\s+(\d+)', output) return int(values[1]) def _create_rss_ctx(cfg, chan): - output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout + output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout values = re.search(r'New RSS context is (\d+)', output).group(1) ctx_id = int(values) - return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote)) + return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}")) def _set_flow_rule(cfg, port, chan): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) def _set_flow_rule_rss(cfg, port, ctx_id): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) @@ -47,26 +47,26 @@ def test_zcrx(cfg) -> None: (rx_ring, hds_thresh) = _get_current_settings(cfg) port = rand_port() - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(port, proto="tcp", host=cfg.remote) - cmd(tx_cmd) + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) def test_zcrx_oneshot(cfg) -> None: @@ -78,26 +78,26 @@ def test_zcrx_oneshot(cfg) -> None: (rx_ring, hds_thresh) = _get_current_settings(cfg) port = rand_port() - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(port, proto="tcp", host=cfg.remote) - cmd(tx_cmd) + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) def test_zcrx_rss(cfg) -> None: @@ -109,27 +109,27 @@ def test_zcrx_rss(cfg) -> None: (rx_ring, hds_thresh) = _get_current_settings(cfg) port = rand_port() - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1) flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(port, proto="tcp", host=cfg.remote) - cmd(tx_cmd) + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) def main() -> None: diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index b582885786f5..1462a339a74b 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -7,8 +7,25 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) + from net.lib.py import * from drivers.net.lib.py import * + + # Import one by one to avoid pylint false positives + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily, DevlinkFamily + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ + rand_port, tool, wait_port_listen + from net.lib.py import fd_read_timeout + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true + from net.lib.py import NetNSEnter + from drivers.net.lib.py import GenerateTraffic + from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 02e4d3d7ded2..72f828021f83 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -526,12 +526,10 @@ static struct netdev_queue_id *create_queues(void) struct netdev_queue_id *queues; size_t i = 0; - queues = calloc(num_queues, sizeof(*queues)); + queues = netdev_queue_id_alloc(num_queues); for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; + netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX); + netdev_queue_id_set_id(&queues[i], start_queue + i); } return queues; @@ -852,7 +850,6 @@ static int do_client(struct memory_buffer *mem) ssize_t line_size = 0; struct cmsghdr *cmsg; char *line = NULL; - unsigned long mid; size_t len = 0; int socket_fd; __u32 ddmabuf; diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py new file mode 100755 index 000000000000..19847f3d4a00 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -0,0 +1,476 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +API level tests for RSS (mostly Netlink vs IOCTL). +""" + +import errno +import glob +import random +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import defer, ethtool, CmdExitFailure +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + + +def _require_2qs(cfg): + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + return qcnt + + +def _ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def _ethtool_get_cfg(cfg, fl_type, to_nl=False): + descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + if to_nl: + converter = { + "IP SA": "ip-src", + "IP DA": "ip-dst", + "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3", + } + + ret = set() + else: + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + } + + ret = "" + + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + if to_nl: + ret.add(converter[line]) + else: + ret += converter[line] + return ret + + +def test_rxfh_nl_set_fail(cfg): + """ + Test error path of Netlink SET. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-name": "lo"}, + "indir": None}) + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [100000]}) + ntf = next(ethnl.poll_ntf(duration=0.2), None) + ksft_is(ntf, None) + + +def test_rxfh_nl_set_indir(cfg): + """ + Test setting indirection table via Netlink. + """ + qcnt = _require_2qs(cfg) + + # Test some SETs with a value + reset = defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, "indir": None}) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + # Test reset back to default + reset.exec() + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt))) + + +def test_rxfh_nl_set_indir_ctx(cfg): + """ + Test setting indirection table for a custom context via Netlink. + """ + _require_2qs(cfg) + + # Get setting for ctx 0, we'll make sure they don't get clobbered + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + + # Create context + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + +def test_rxfh_indir_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1") + reset = defer(ethtool, f"-X {cfg.ifname} default") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after reset") + ksft_eq(ntf["name"], "rss-ntf") + ksft_is(ntf["msg"].get("context"), None) + ksft_ne(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_indir_ctx_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified on an additional RSS context. + """ + _require_2qs(cfg) + + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"].get("context"), ctx_id) + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_nl_set_key(cfg): + """ + Test setting hashing key via Netlink. + """ + + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, + "hkey": dflt["hkey"], "indir": None}) + + # Empty key should error out + with ksft_raises(NlError) as cm: + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": None}) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey') + + # Set key to random + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + + # Set key to random and indir tbl to something at once + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1], "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + +def test_rxfh_fields(cfg): + """ + Test reading Rx Flow Hash over Netlink. + """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + ethnl = EthtoolFamily() + + cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + one = _ethtool_get_cfg(cfg, fl_type, to_nl=True) + ksft_eq(one, cfg_nl["flow-hash"][fl_type], + comment="Config for " + fl_type) + + +def test_rxfh_fields_set(cfg): + """ Test configuring Rx Flow Hash over Netlink. """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + + # Collect current settings + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # symmetric hashing prevents some of the configs below + if cfg_old.get("input-xfrm"): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": {}}) + + for fl_type in flow_types: + cur = _ethtool_get_cfg(cfg, fl_type) + if cur == "sdfn": + change_nl = {"ip-src", "ip-dst"} + change_ic = "sd" + else: + change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + change_ic = "sdfn" + + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {fl_type: change_nl} + }) + reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} " + f"rx-flow-hash {fl_type} {cur}") + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type], + comment=f"Config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(change_ic, cfg_ic, + comment=f"Config for {fl_type} over IOCTL") + + reset.exec() + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type], + comment=f"Un-config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL") + + # Try to set multiple at once, the defer was already installed at the start + change = {"ip-src"} + if change == cfg_old["flow-hash"]["tcp4"]: + change = {"ip-dst"} + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {x: change for x in flow_types} + }) + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + ksft_eq(change, cfg_nl["flow-hash"][fl_type], + comment=f"multi-config for {fl_type} over Netlink") + + +def test_rxfh_fields_set_xfrm(cfg): + """ Test changing Rx Flow Hash vs xfrm_input at once. """ + + def set_rss(cfg, xfrm, fh): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": xfrm, "flow-hash": fh}) + + # Install the reset handler + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # Make sure we start with input-xfrm off, and tcp4 config non-sym + set_rss(cfg, {}, {}) + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + # Setting sym and fixing tcp4 config not expected to pass right now + with ksft_raises(NlError): + set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}}) + # One at a time should work, hopefully + set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}}) + no_support = False + try: + set_rss(cfg, {"sym-xor"}, {}) + except NlError: + try: + set_rss(cfg, {"sym-or-xor"}, {}) + except NlError: + no_support = True + if no_support: + raise KsftSkipEx("no input-xfrm supported") + # Disabling two at once should not work either without kernel changes + with ksft_raises(NlError): + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + +def test_rxfh_fields_ntf(cfg): + """ Test Rx Flow Hash notifications. """ + + cur = _ethtool_get_cfg(cfg, "tcp4") + if cur == "sdfn": + change = {"ip-src", "ip-dst"} + else: + change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {"tcp4": change} + }) + reset = defer(ethtool, + f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after IOCTL change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after Netlink change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + +def test_rss_ctx_add(cfg): + """ Test creating an additional RSS context via Netlink """ + + _require_2qs(cfg) + + # Test basic creation + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_ne(ctx.get("context", 0), 0) + ksft_ne(set(ctx.get("indir", [0])), {0}, + comment="Driver should init the indirection table") + + # Try requesting the ID we just got allocated + with ksft_raises(NlError) as cm: + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx.get("context"), + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + d.exec() + ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY) + + # Test creating with a specified RSS table, and context ID + ctx_id = ctx.get("context") + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx_id, + "indir": [1], + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_eq(ctx.get("context"), ctx_id) + ksft_eq(set(ctx.get("indir", [0])), {1}) + + +def test_rss_ctx_ntf(cfg): + """ Test notifications for creating additional RSS contexts """ + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + # Create / delete via Netlink + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + cfg.ethnl.rss_delete_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx["context"], + }) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + ksft_eq(ctx, ntf["msg"]) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + # Create / deleve via IOCTL + ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new") + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete") + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index 648ff50bc1c3..72880e388478 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -32,16 +32,16 @@ def test_rss_input_xfrm(cfg, ipver): if multiprocessing.cpu_count() < 2: raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") - cfg.require_cmd("socat", remote=True) + cfg.require_cmd("socat", local=False, remote=True) if not hasattr(socket, "SO_INCOMING_CPU"): raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") - input_xfrm = cfg.ethnl.rss_get( - {'header': {'dev-name': cfg.ifname}}).get('input-xfrm') + rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}) + input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {}))) # Check for symmetric xor/or-xor - if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2): + if not input_xfrm: raise KsftSkipEx("Symmetric RSS hash not requested") cpus = set() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index 3370827409aa..c13dd5efa27a 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -34,7 +34,7 @@ def tcp_sock_get_retrans(sock): def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): - cfg.require_cmd("socat", remote=True) + cfg.require_cmd("socat", local=False, remote=True) port = rand_port() listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" @@ -102,7 +102,7 @@ def build_tunnel(cfg, outer_ipver, tun_info): remote_addr = cfg.remote_addr_v[outer_ipver] tun_type = tun_info[0] - tun_arg = tun_info[2] + tun_arg = tun_info[1] ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") defer(ip, f"link del {tun_type}-ksft") ip(f"link set dev {tun_type}-ksft up") @@ -119,15 +119,30 @@ def build_tunnel(cfg, outer_ipver, tun_info): return remote_v4, remote_v6 +def restore_wanted_features(cfg): + features_cmd = "" + for feature in cfg.hw_features: + setting = "on" if feature in cfg.wanted_features else "off" + features_cmd += f" {feature} {setting}" + try: + ethtool(f"-K {cfg.ifname} {features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure restoring wanted features: {e}") + + def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): """Construct specific tests from the common template.""" def f(cfg): cfg.require_ipver(outer_ipver) + defer(restore_wanted_features, cfg) if not cfg.have_stat_super_count and \ not cfg.have_stat_wire_count: raise KsftSkipEx(f"Device does not support LSO queue stats") + if feature not in cfg.hw_features: + raise KsftSkipEx(f"Device does not support {feature}") + ipver = outer_ipver if tun: remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) @@ -136,36 +151,21 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): remote_v4 = cfg.remote_addr_v["4"] remote_v6 = cfg.remote_addr_v["6"] - tun_partial = tun and tun[1] - # Tunnel which can silently fall back to gso-partial - has_gso_partial = tun and 'tx-gso-partial' in cfg.features - - # For TSO4 via partial we need mangleid - if ipver == "4" and feature in cfg.partial_features: - ksft_pr("Testing with mangleid enabled") - if 'tx-tcp-mangleid-segmentation' not in cfg.features: - ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") - defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") - # First test without the feature enabled. ethtool(f"-K {cfg.ifname} {feature} off") - if has_gso_partial: - ethtool(f"-K {cfg.ifname} tx-gso-partial off") run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) - # Now test with the feature enabled. - # For compatible tunnels only - just GSO partial, not specific feature. - if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + if feature in cfg.partial_features: ethtool(f"-K {cfg.ifname} tx-gso-partial on") - run_one_stream(cfg, ipver, remote_v4, remote_v6, - should_lso=tun_partial) + if ipver == "4": + ksft_pr("Testing with mangleid enabled") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") # Full feature enabled. - if feature in cfg.features: - ethtool(f"-K {cfg.ifname} {feature} on") - run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) - else: - raise KsftXfailEx(f"Device does not support {feature}") + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver return f @@ -176,23 +176,39 @@ def query_nic_features(cfg) -> None: cfg.have_stat_super_count = False cfg.have_stat_wire_count = False - cfg.features = set() features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) - for f in features["active"]["bits"]["bit"]: - cfg.features.add(f["name"]) + + cfg.wanted_features = set() + for f in features["wanted"]["bits"]["bit"]: + cfg.wanted_features.add(f["name"]) + + cfg.hw_features = set() + hw_all_features_cmd = "" + for f in features["hw"]["bits"]["bit"]: + if f.get("value", False): + feature = f["name"] + cfg.hw_features.add(feature) + hw_all_features_cmd += f" {feature} on" + try: + ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure enabling all hw features: {e}") + ksft_pr("partial gso feature detection may be impacted") # Check which features are supported via GSO partial cfg.partial_features = set() - if 'tx-gso-partial' in cfg.features: + if 'tx-gso-partial' in cfg.hw_features: ethtool(f"-K {cfg.ifname} tx-gso-partial off") no_partial = set() features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) for f in features["active"]["bits"]["bit"]: no_partial.add(f["name"]) - cfg.partial_features = cfg.features - no_partial + cfg.partial_features = cfg.hw_features - no_partial ethtool(f"-K {cfg.ifname} tx-gso-partial on") + restore_wanted_features(cfg) + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) if stats: if 'tx-hw-gso-packets' in stats[0]: @@ -211,13 +227,14 @@ def main() -> None: query_nic_features(cfg) test_info = ( - # name, v4/v6 ethtool_feature tun:(type, partial, args) - ("", "4", "tx-tcp-segmentation", None), - ("", "6", "tx-tcp6-segmentation", None), - ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), - ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), - ("gre", "4", "tx-gre-segmentation", ("gre", False, "")), - ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), + # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))), + ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))), + ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))), + ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))), ) cases = [] @@ -227,11 +244,13 @@ def main() -> None: if info[1] and outer_ipver != info[1]: continue - cases.append(test_builder(info[0], cfg, outer_ipver, info[2], - tun=info[3], inner_ipver="4")) if info[3]: - cases.append(test_builder(info[0], cfg, outer_ipver, info[2], - tun=info[3], inner_ipver="6")) + cases += [ + test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver) + for inner_ipver in info[3][2] + ] + else: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver)) ksft_run(cases=cases, args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 401e70f7f136..8711c67ad658 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -7,7 +7,21 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) + from net.lib.py import * + + # Import one by one to avoid pylint false positives + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily, DevlinkFamily + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, tool, wait_port_listen + from net.lib.py import fd_read_timeout + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 3bccddf8cbc5..1b8bd648048f 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -259,7 +259,7 @@ class NetDrvEpEnv(NetDrvEnvBase): if not self._require_cmd(comm, "local"): raise KsftSkipEx("Test requires command: " + comm) if remote: - if not self._require_cmd(comm, "remote"): + if not self._require_cmd(comm, "remote", host=self.remote): raise KsftSkipEx("Test requires (remote) command: " + comm) def wait_hw_stats_settle(self): diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index 44151b7b1a24..c4e808407cc4 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -7,7 +7,7 @@ from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen class GenerateTraffic: def __init__(self, env, port=None): - env.require_cmd("iperf3", remote=True) + env.require_cmd("iperf3", local=True, remote=True) self.env = env diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 29b01b8e2215..b6071e80ebbb 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -11,9 +11,11 @@ set -euo pipefail LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") SRCIF="" # to be populated later -SRCIP=192.0.2.1 +SRCIP4="192.0.2.1" +SRCIP6="fc00::1" DSTIF="" # to be populated later -DSTIP=192.0.2.2 +DSTIP4="192.0.2.2" +DSTIP6="fc00::2" PORT="6666" MSG="netconsole selftest" @@ -80,7 +82,23 @@ function configure_ip() { ip link set "${SRCIF}" up } +function select_ipv4_or_ipv6() +{ + local VERSION=${1} + + if [[ "$VERSION" == "ipv6" ]] + then + DSTIP="${DSTIP6}" + SRCIP="${SRCIP6}" + else + DSTIP="${DSTIP4}" + SRCIP="${SRCIP4}" + fi +} + function set_network() { + local IP_VERSION=${1:-"ipv4"} + # setup_ns function is coming from lib.sh setup_ns NAMESPACE @@ -91,10 +109,13 @@ function set_network() { # Link both interfaces back to back link_ifaces + select_ipv4_or_ipv6 "${IP_VERSION}" configure_ip } function create_dynamic_target() { + local FORMAT=${1:-"extended"} + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') @@ -106,7 +127,33 @@ function create_dynamic_target() { echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + if [ "${FORMAT}" == "basic" ] + then + # Basic target does not support release + echo 0 > "${NETCONS_PATH}"/release + echo 0 > "${NETCONS_PATH}"/extended + elif [ "${FORMAT}" == "extended" ] + then + echo 1 > "${NETCONS_PATH}"/extended + fi + echo 1 > "${NETCONS_PATH}"/enabled + + # This will make sure that the kernel was able to + # load the netconsole driver configuration. The console message + # gets more organized/sequential as well. + sleep 1 +} + +# Generate the command line argument for netconsole following: +# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr] +function create_cmdline_str() { + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + SRCPORT="1514" + TGTPORT="6666" + + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\"" } # Do not append the release to the header of the message @@ -116,16 +163,9 @@ function disable_release_append() { echo 1 > "${NETCONS_PATH}"/enabled } -function cleanup() { +function do_cleanup() { local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" - # delete netconsole dynamic reconfiguration - echo 0 > "${NETCONS_PATH}"/enabled - # Remove all the keys that got created during the selftest - find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete - # Remove the configfs entry - rmdir "${NETCONS_PATH}" - # Delete netdevsim devices echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" @@ -137,6 +177,17 @@ function cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } +function cleanup() { + # delete netconsole dynamic reconfiguration + echo 0 > "${NETCONS_PATH}"/enabled + # Remove all the keys that got created during the selftest + find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete + # Remove the configfs entry + rmdir "${NETCONS_PATH}" + + do_cleanup +} + function set_user_data() { if [[ ! -d "${NETCONS_PATH}""/userdata" ]] then @@ -152,18 +203,24 @@ function set_user_data() { function listen_port_and_save_to() { local OUTPUT=${1} + local IPVERSION=${2:-"ipv4"} + + if [ "${IPVERSION}" == "ipv4" ] + then + SOCAT_MODE="UDP-LISTEN" + else + SOCAT_MODE="UDP6-LISTEN" + fi + # Just wait for 2 seconds timeout 2 ip netns exec "${NAMESPACE}" \ - socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" + socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" } -function validate_result() { +# Only validate that the message arrived properly +function validate_msg() { local TMPFILENAME="$1" - # TMPFILENAME will contain something like: - # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM - # key=value - # Check if the file exists if [ ! -f "$TMPFILENAME" ]; then echo "FAIL: File was not generated." >&2 @@ -175,17 +232,32 @@ function validate_result() { cat "${TMPFILENAME}" >&2 exit "${ksft_fail}" fi +} - if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then - echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 - cat "${TMPFILENAME}" >&2 - exit "${ksft_fail}" +# Validate the message and userdata +function validate_result() { + local TMPFILENAME="$1" + + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + + validate_msg "${TMPFILENAME}" + + # userdata is not supported on basic format target, + # thus, do not validate it. + if [ "${FORMAT}" != "basic" ]; + then + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi fi # Delete the file once it is validated, otherwise keep it # for debugging purposes rm "${TMPFILENAME}" - exit "${ksft_pass}" } function check_for_dependencies() { @@ -209,6 +281,11 @@ function check_for_dependencies() { exit "${ksft_skip}" fi + if [ ! -f /proc/net/if_inet6 ]; then + echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2 + exit "${ksft_skip}" + fi + if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 exit "${ksft_skip}" @@ -224,8 +301,15 @@ function check_for_dependencies() { exit "${ksft_skip}" fi - if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then - echo "SKIP: IPs already in use. Skipping it" >&2 + REGEXP4="inet.*(${SRCIP4}|${DSTIP4})" + REGEXP6="inet.*(${SRCIP6}|${DSTIP6})" + if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then + echo "SKIP: IPv4s already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then + echo "SKIP: IPv6s already in use. Skipping it" >&2 exit "${ksft_skip}" fi } @@ -239,10 +323,41 @@ function check_for_taskset() { # This is necessary if running multiple tests in a row function pkill_socat() { - PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}" # socat runs under timeout(1), kill it if it is still alive # do not fail if socat doesn't exist anymore set +e - pkill -f "${PROCESS_NAME}" + pkill -f "${PROCESS_NAME4}" + pkill -f "${PROCESS_NAME6}" set -e } + +# Check if netconsole was compiled as a module, otherwise exit +function check_netconsole_module() { + if modinfo netconsole | grep filename: | grep -q builtin + then + echo "SKIP: netconsole should be compiled as a module" >&2 + exit "${ksft_skip}" + fi +} + +# A wrapper to translate protocol version to udp version +function wait_for_port() { + local NAMESPACE=${1} + local PORT=${2} + IP_VERSION=${3} + + if [ "${IP_VERSION}" == "ipv6" ] + then + PROTOCOL="udp6" + else + PROTOCOL="udp" + fi + + wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}" + # even after the port is open, let's wait 1 second before writing + # otherwise the packet could be missed, and the test will fail. Happens + # more frequently on IPv6 + sleep 1 +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 899b6892603f..d7505b933aef 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -51,7 +51,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do fi ${current_test}_setup_prepare - setup_wait $num_netifs + setup_wait_n $num_netifs # Update target in case occupancy of a certain resource changed # following the test setup. target=$(${current_test}_get_target "$should_fail") diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 482ebb744eba..7b98cdd0580d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -55,7 +55,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do continue fi ${current_test}_setup_prepare - setup_wait $num_netifs + setup_wait_n $num_netifs # Update target in case occupancy of a certain resource # changed following the test setup. target=$(${current_test}_get_target "$should_fail") diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py index 356bac46ba04..d05eddcad539 100755 --- a/tools/testing/selftests/drivers/net/napi_id.py +++ b/tools/testing/selftests/drivers/net/napi_id.py @@ -7,10 +7,10 @@ from lib.py import bkg, cmd, rand_port, NetNSEnter def test_napi_id(cfg) -> None: port = rand_port() - listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}" + listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}" with bkg(listen_cmd, ksft_wait=3) as server: - cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True) + cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True) ksft_eq(0, server.ret) diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c index eecd610c2109..7f49ca6c8637 100644 --- a/tools/testing/selftests/drivers/net/napi_id_helper.c +++ b/tools/testing/selftests/drivers/net/napi_id_helper.c @@ -7,41 +7,58 @@ #include <unistd.h> #include <arpa/inet.h> #include <sys/socket.h> +#include <netdb.h> #include "../../net/lib/ksft.h" int main(int argc, char *argv[]) { - struct sockaddr_in address; + struct sockaddr_storage address; + struct addrinfo *result; + struct addrinfo hints; unsigned int napi_id; - unsigned int port; + socklen_t addr_len; socklen_t optlen; char buf[1024]; int opt = 1; + int family; int server; int client; int ret; - server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_PASSIVE; + + ret = getaddrinfo(argv[1], argv[2], &hints, &result); + if (ret != 0) { + fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret)); + return 1; + } + + family = result->ai_family; + addr_len = result->ai_addrlen; + + server = socket(family, SOCK_STREAM, IPPROTO_TCP); if (server < 0) { perror("socket creation failed"); + freeaddrinfo(result); if (errno == EAFNOSUPPORT) return -1; return 1; } - port = atoi(argv[2]); - if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) { perror("setsockopt"); + freeaddrinfo(result); return 1; } - address.sin_family = AF_INET; - inet_pton(AF_INET, argv[1], &address.sin_addr); - address.sin_port = htons(port); + memcpy(&address, result->ai_addr, result->ai_addrlen); + freeaddrinfo(result); - if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) { + if (bind(server, (struct sockaddr *)&address, addr_len) < 0) { perror("bind failed"); return 1; } diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index fe765da498e8..a3446b569976 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -32,21 +32,42 @@ check_for_dependencies echo "6 5" > /proc/sys/kernel/printk # Remove the namespace, interfaces and netconsole target on exit trap cleanup EXIT -# Create one namespace and two interfaces -set_network -# Create a dynamic target for netconsole -create_dynamic_target -# Set userdata "key" with the "value" value -set_user_data -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" - -# Make sure the message was received in the dst part -# and exit -validate_result "${OUTPUT_FILE}" + +# Run the test twice, with different format modes +for FORMAT in "basic" "extended" +do + for IP_VERSION in "ipv6" "ipv4" + do + echo "Running with target mode: ${FORMAT} (${IP_VERSION})" + # Create one namespace and two interfaces + set_network "${IP_VERSION}" + # Create a dynamic target for netconsole + create_dynamic_target "${FORMAT}" + # Only set userdata for extended format + if [ "$FORMAT" == "extended" ] + then + # Set userdata "key" with the "value" value + set_user_data + fi + # Listed for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat + cleanup + echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2 + done +done + +trap - EXIT +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh new file mode 100755 index 000000000000..ad2fb8b1c463 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This is a selftest to test cmdline arguments on netconsole. +# It exercises loading of netconsole from cmdline instead of the dynamic +# reconfiguration. This includes parsing the long netconsole= line and all the +# flow through init_netconsole(). +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +check_netconsole_module + +modprobe netdevsim 2> /dev/null || true +rmmod netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +# check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace and network interfaces +trap do_cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create the command line for netconsole, with the configuration from the +# function above +CMDLINE="$(create_cmdline_str)" + +# Load the module, with the cmdline set +modprobe netconsole "${CMDLINE}" + +# Listed for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +# Make sure the message was received in the dst part +# and exit +validate_msg "${OUTPUT_FILE}" + +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh index a737e377bf08..baf69031089e 100755 --- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh +++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh @@ -53,6 +53,17 @@ function set_release() { echo 1 > "${NETCONS_PATH}/userdata/release_enabled" } +# Enable the msgid to be appended to sysdata +function set_msgid() { + if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]] + then + echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + # Disable the sysdata cpu_nr feature function unset_cpu_nr() { echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" @@ -67,6 +78,10 @@ function unset_release() { echo 0 > "${NETCONS_PATH}/userdata/release_enabled" } +function unset_msgid() { + echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + # Test if MSG contains sysdata function validate_sysdata() { # OUTPUT_FILE will contain something like: @@ -74,6 +89,7 @@ function validate_sysdata() { # userdatakey=userdatavalue # cpu=X # taskname=<taskname> + # msgid=<id> # Echo is what this test uses to create the message. See runtest() # function @@ -104,6 +120,12 @@ function validate_sysdata() { exit "${ksft_fail}" fi + if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + rm "${OUTPUT_FILE}" pkill_socat } @@ -155,6 +177,12 @@ function validate_no_sysdata() { exit "${ksft_fail}" fi + if grep -q "msgid=" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + rm "${OUTPUT_FILE}" } @@ -206,6 +234,7 @@ set_cpu_nr # Enable taskname to be appended to sysdata set_taskname set_release +set_msgid runtest # Make sure the message was received in the dst part # and exit @@ -235,6 +264,7 @@ MSG="Test #3 from CPU${CPU}" unset_cpu_nr unset_taskname unset_release +unset_msgid runtest # At this time, cpu= shouldn't be present in the msg validate_no_sysdata diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index b5ea2526f23c..030762b203d7 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -40,6 +40,8 @@ fw_flash_test() return fi + echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms + devlink dev flash $DL_HANDLE file $DUMMYFILE check_err $? "Failed to flash with status updates on" @@ -608,6 +610,46 @@ rate_attr_parent_check() check_err $? "Unexpected parent attr value $api_value != $parent" } +rate_attr_tc_bw_check() +{ + local handle=$1 + local tc_bw=$2 + local debug_file=$3 + + local tc_bw_str="" + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + tc_bw_str="$tc_bw_str $tc:$value" + done + tc_bw_str=${tc_bw_str# } + + rate_attr_set "$handle" tc-bw "$tc_bw_str" + check_err $? "Failed to set tc-bw values" + + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + local debug_value + debug_value=$(cat "$debug_file"/tc"${tc}"_bw) + check_err $? "Failed to read tc-bw value from debugfs for tc$tc" + [ "$debug_value" == "$value" ] + check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value" + done + + for bw in $tc_bw; do + local tc=${bw%%:*} + local expected_value=${bw##*:} + local api_value + api_value=$(rate_attr_get "$handle" tc_"$tc") + if [ "$api_value" = "null" ]; then + api_value=0 + fi + [ "$api_value" == "$expected_value" ] + check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value" + done +} + rate_node_add() { local handle=$1 @@ -649,6 +691,13 @@ rate_test() rate=$(($rate+100)) done + local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0" + for r_obj in $leafs + do + rate_attr_tc_bw_check "$r_obj" "$tc_bw" \ + "$DEBUGFS_DIR"/ports/"${r_obj##*/}" + done + local node1_name='group1' local node1="$DL_HANDLE/$node1_name" rate_node_add "$node1" @@ -666,6 +715,12 @@ rate_test() rate_attr_tx_rate_check $node1 tx_max $node_tx_max \ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max + + local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0" + rate_attr_tc_bw_check $node1 "$tc_bw" \ + "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}" + + rate_node_del "$node1" check_err $? "Failed to delete node $node1" local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 92c2f0376c08..4c859ecdad94 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -266,7 +266,6 @@ for port in 0 1; do echo $NSIM_ID > /sys/bus/netdevsim/new_device else echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` @@ -350,23 +349,11 @@ old_netdevs=$(ls /sys/class/net) port=0 echo $NSIM_ID > /sys/bus/netdevsim/new_device echo 0 > $NSIM_DEV_SYS/del_port -echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep echo 0 > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` msg="create VxLANs" -exp0=( 0 0 0 0 ) # sleep is longer than out wait -new_vxlan vxlan0 10000 $NSIM_NETDEV - -modprobe -r vxlan -modprobe -r udp_tunnel - -msg="remove tunnels" -exp0=( 0 0 0 0 ) -check_tables - -msg="create VxLANs" -exp0=( 0 0 0 0 ) # sleep is longer than out wait +exp0=( `mke 10000 1` 0 0 0 ) new_vxlan vxlan0 10000 $NSIM_NETDEV exp0=( 0 0 0 0 ) @@ -428,7 +415,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -486,7 +472,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -543,7 +528,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -573,7 +557,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -634,7 +617,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -690,7 +672,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -750,7 +731,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -809,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device echo 0 > $NSIM_DEV_SYS/del_port echo 0 > $NSIM_DEV_DFS/udp_ports_open_only -echo 1 > $NSIM_DEV_DFS/udp_ports_sleep echo 1 > $NSIM_DEV_DFS/udp_ports_shared old_netdevs=$(ls /sys/class/net) diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py new file mode 100755 index 000000000000..408bd54d6779 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netpoll_basic.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Author: Breno Leitao <leitao@debian.org> +""" + This test aims to evaluate the netpoll polling mechanism (as in + netpoll_poll_dev()). It presents a complex scenario where the network + attempts to send a packet but fails, prompting it to poll the NIC from within + the netpoll TX side. + + This has been a crucial path in netpoll that was previously untested. Jakub + suggested using a single RX/TX queue, pushing traffic to the NIC, and then + sending netpoll messages (via netconsole) to trigger the poll. + + In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If + so, the test passes, otherwise it will be skipped. This test is very dependent on + the driver and environment, given we are trying to trigger a tricky scenario. +""" + +import errno +import logging +import os +import random +import string +import threading +import time +from typing import Optional + +from lib.py import ( + bpftrace, + CmdExitFailure, + defer, + ethtool, + GenerateTraffic, + ksft_exit, + ksft_pr, + ksft_run, + KsftFailEx, + KsftSkipEx, + NetDrvEpEnv, + KsftXfailEx, +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole" +NETCONS_REMOTE_PORT: int = 6666 +NETCONS_LOCAL_PORT: int = 1514 + +# Max number of netcons messages to send. Each iteration will setup +# netconsole and send MAX_WRITES messages +ITERATIONS: int = 20 +# Number of writes to /dev/kmsg per iteration +MAX_WRITES: int = 40 +# MAPS contains the information coming from bpftrace it will have only one +# key: "hits", which tells the number of times netpoll_poll_dev() was called +MAPS: dict[str, int] = {} +# Thread to run bpftrace in parallel +BPF_THREAD: Optional[threading.Thread] = None +# Time bpftrace will be running in parallel. +BPFTRACE_TIMEOUT: int = 10 + + +def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]: + """ + Read the ringsize using ethtool. This will be used to restore it after the test + """ + try: + ethtool_result = ethtool(f"-g {interface_name}", json=True)[0] + rxs = ethtool_result["rx"] + txs = ethtool_result["tx"] + except (KeyError, IndexError) as exception: + raise KsftSkipEx( + f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them." + ) from exception + + return rxs, txs + + +def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool: + """Try to the number of RX and TX ringsize.""" + rxs = ring_size[0] + txs = ring_size[1] + + logging.debug("Setting ring size to %d/%d", rxs, txs) + try: + ethtool(f"-G {interface_name} rx {rxs} tx {txs}") + except CmdExitFailure: + # This might fail on real device, retry with a higher value, + # worst case, keep it as it is. + return False + + return True + + +def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]: + """Read the number of RX, TX and combined queues using ethtool""" + + try: + ethtool_result = ethtool(f"-l {interface_name}", json=True)[0] + rxq = ethtool_result.get("rx", -1) + txq = ethtool_result.get("tx", -1) + combined = ethtool_result.get("combined", -1) + + except IndexError as exception: + raise KsftSkipEx( + f"Failed to read queues numbers: {exception}. Not going to mess with them." + ) from exception + + return rxq, txq, combined + + +def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None: + """Set the number of RX, TX and combined queues using ethtool""" + rxq, txq, combined = queues + + cmdline = f"-L {interface_name}" + + if rxq != -1: + cmdline += f" rx {rxq}" + if txq != -1: + cmdline += f" tx {txq}" + if combined != -1: + cmdline += f" combined {combined}" + + logging.debug("calling: ethtool %s", cmdline) + + try: + ethtool(cmdline) + except CmdExitFailure as exception: + raise KsftSkipEx( + f"Failed to configure RX/TX queues: {exception}. Ethtool not available?" + ) from exception + + +def netcons_generate_random_target_name() -> str: + """Generate a random target name starting with 'netcons'""" + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8)) + return f"netcons_{random_suffix}" + + +def netcons_create_target( + config_data: dict[str, str], + target_name: str, +) -> None: + """Create a netconsole dynamic target against the interfaces""" + logging.debug("Using netconsole name: %s", target_name) + try: + os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True) + logging.debug( + "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name + ) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise KsftFailEx( + f"Failed to create netconsole target directory: {exception}" + ) from exception + + try: + for key, value in config_data.items(): + path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}" + logging.debug("Writing %s to %s", key, path) + with open(path, "w", encoding="utf-8") as file: + # Always convert to string to write to file + file.write(str(value)) + + # Read all configuration values for debugging purposes + for debug_key in config_data.keys(): + with open( + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}", + "r", + encoding="utf-8", + ) as file: + content = file.read() + logging.debug( + "%s/%s/%s : %s", + NETCONSOLE_CONFIGFS_PATH, + target_name, + debug_key, + content.strip(), + ) + + except Exception as exception: + raise KsftFailEx( + f"Failed to configure netconsole target: {exception}" + ) from exception + + +def netcons_configure_target( + cfg: NetDrvEpEnv, interface_name: str, target_name: str +) -> None: + """Configure netconsole on the interface with the given target name""" + config_data = { + "extended": "1", + "dev_name": interface_name, + "local_port": NETCONS_LOCAL_PORT, + "remote_port": NETCONS_REMOTE_PORT, + "local_ip": cfg.addr, + "remote_ip": cfg.remote_addr, + "remote_mac": "00:00:00:00:00:00", # Not important for this test + "enabled": "1", + } + + netcons_create_target(config_data, target_name) + logging.debug( + "Created netconsole target: %s on interface %s", target_name, interface_name + ) + + +def netcons_delete_target(name: str) -> None: + """Delete a netconsole dynamic target""" + target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}" + try: + if os.path.exists(target_path): + os.rmdir(target_path) + except OSError as exception: + raise KsftFailEx( + f"Failed to delete netconsole target: {exception}" + ) from exception + + +def netcons_load_module() -> None: + """Try to load the netconsole module""" + os.system("modprobe netconsole") + + +def bpftrace_call() -> None: + """Call bpftrace to find how many times netpoll_poll_dev() is called. + Output is saved in the global variable `maps`""" + + # This is going to update the global variable, that will be seen by the + # main function + global MAPS # pylint: disable=W0603 + + # This will be passed to bpftrace as in bpftrace -e "expr" + expr = "kprobe:netpoll_poll_dev { @hits = count(); }" + + MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True) + logging.debug("BPFtrace output: %s", MAPS) + + +def bpftrace_start(): + """Start a thread to call `call_bpf` in a parallel thread""" + global BPF_THREAD # pylint: disable=W0603 + + BPF_THREAD = threading.Thread(target=bpftrace_call) + BPF_THREAD.start() + if not BPF_THREAD.is_alive(): + raise KsftSkipEx("BPFtrace thread is not alive. Skipping test") + + +def bpftrace_stop() -> None: + """Stop the bpftrace thread""" + if BPF_THREAD: + BPF_THREAD.join() + + +def bpftrace_any_hit(join: bool) -> bool: + """Check if netpoll_poll_dev() was called by checking the global variable `maps`""" + if not BPF_THREAD: + raise KsftFailEx("BPFtrace didn't start") + + if BPF_THREAD.is_alive(): + if join: + # Wait for bpftrace to finish + BPF_THREAD.join() + else: + # bpftrace is still running, so, we will not check the result yet + return False + + logging.debug("MAPS coming from bpftrace = %s", MAPS) + if "hits" not in MAPS.keys(): + raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}") + + logging.debug("Got a total of %d hits", MAPS["hits"]) + return MAPS["hits"] > 0 + + +def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + # Start bpftrace in parallel, so, it is watching + # netpoll_poll_dev() while we are sending netconsole messages + bpftrace_start() + defer(bpftrace_stop) + + do_netpoll_flush(cfg, ifname, target_name) + + if bpftrace_any_hit(join=True): + ksft_pr("netpoll_poll_dev() was called. Success") + return + + raise KsftXfailEx("netpoll_poll_dev() was not called during the test...") + + +def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + netcons_configure_target(cfg, ifname, target_name) + retry = 0 + + for i in range(int(ITERATIONS)): + if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False): + # bpftrace is done, stop sending messages + break + + msg = f"netcons test #{i}" + with open("/dev/kmsg", "w", encoding="utf-8") as kmsg: + for j in range(MAX_WRITES): + try: + kmsg.write(f"{msg}-{j}\n") + except OSError as exception: + # in some cases, kmsg can be busy, so, we will retry + time.sleep(1) + retry += 1 + if retry < 5: + logging.info("Failed to write to kmsg. Retrying") + # Just retry a few times + continue + raise KsftFailEx( + f"Failed to write to kmsg: {exception}" + ) from exception + + netcons_delete_target(target_name) + netcons_configure_target(cfg, ifname, target_name) + # If we sleep here, we will have a better chance of triggering + # This number is based on a few tests I ran while developing this test + time.sleep(0.4) + + +def configure_network(ifname: str) -> None: + """Configure ring size and queue numbers""" + + # Set defined queues to 1 to force congestion + prev_queues = ethtool_get_queues_cnt(ifname) + logging.debug("RX/TX/combined queues: %s", prev_queues) + # Only set the queues to 1 if they exists in the device. I.e, they are > 0 + ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues)) + defer(ethtool_set_queues_cnt, ifname, prev_queues) + + # Try to set the ring size to some low value. + # Do not fail if the hardware do not accepted desired values + prev_ring_size = ethtool_get_ringsize(ifname) + for size in [(1, 1), (128, 128), (256, 256)]: + if ethtool_set_ringsize(ifname, size): + # hardware accepted the desired ringsize + logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size) + break + defer(ethtool_set_ringsize, ifname, prev_ring_size) + + +def test_netpoll(cfg: NetDrvEpEnv) -> None: + """ + Test netpoll by sending traffic to the interface and then sending + netconsole messages to trigger a poll + """ + + ifname = cfg.ifname + configure_network(ifname) + target_name = netcons_generate_random_target_name() + traffic = None + + try: + traffic = GenerateTraffic(cfg) + do_netpoll_flush_monitored(cfg, ifname, target_name) + finally: + if traffic: + traffic.stop() + + # Revert RX/TX queues + netcons_delete_target(target_name) + + +def test_check_dependencies() -> None: + """Check if the dependencies are met""" + if not os.path.exists(NETCONSOLE_CONFIGFS_PATH): + raise KsftSkipEx( + f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301 + ) + + +def main() -> None: + """Main function to run the test""" + netcons_load_module() + test_check_dependencies() + with NetDrvEpEnv(__file__) as cfg: + ksft_run( + [test_netpoll], + args=(cfg,), + ) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index e0f114612c1a..da3623c5e8a9 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -30,7 +30,7 @@ def _test_v6(cfg) -> None: cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) def _test_tcp(cfg) -> None: - cfg.require_cmd("socat", remote=True) + cfg.require_cmd("socat", local=False, remote=True) port = rand_port() listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index efcc1e10575b..c2bb5d3f1ca1 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -1,12 +1,16 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +""" +Tests related to standard netdevice statistics. +""" + import errno import subprocess import time from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises -from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import KsftSkipEx, KsftFailEx from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv @@ -18,13 +22,16 @@ rtnl = RtnlFamily() def check_pause(cfg) -> None: - global ethnl + """ + Check that drivers which support Pause config also report standard + pause stats. + """ try: ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("pause not supported by the device") + raise KsftSkipEx("pause not supported by the device") from e raise data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex, @@ -33,13 +40,16 @@ def check_pause(cfg) -> None: def check_fec(cfg) -> None: - global ethnl + """ + Check that drivers which support FEC config also report standard + FEC stats. + """ try: ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("FEC not supported by the device") + raise KsftSkipEx("FEC not supported by the device") from e raise data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, @@ -48,15 +58,17 @@ def check_fec(cfg) -> None: def pkt_byte_sum(cfg) -> None: - global netfam, rtnl + """ + Check that qstat and interface stats match in value. + """ def get_qstat(test): - global netfam stats = netfam.qstats_get({}, dump=True) if stats: for qs in stats: if qs["ifindex"]== test.ifindex: return qs + return None qstat = get_qstat(cfg) if qstat is None: @@ -77,15 +89,14 @@ def pkt_byte_sum(cfg) -> None: for _ in range(10): rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] if stat_cmp(rtstat, qstat) < 0: - raise Exception("RTNL stats are lower, fetched later") + raise KsftFailEx("RTNL stats are lower, fetched later") qstat = get_qstat(cfg) if stat_cmp(rtstat, qstat) > 0: - raise Exception("Qstats are lower, fetched later") + raise KsftFailEx("Qstats are lower, fetched later") def qstat_by_ifindex(cfg) -> None: - global netfam - global rtnl + """ Qstats Netlink API tests - querying by ifindex. """ # Construct a map ifindex -> [dump, by-index, dump] ifindexes = {} @@ -93,7 +104,7 @@ def qstat_by_ifindex(cfg) -> None: for entry in stats: ifindexes[entry['ifindex']] = [entry, None, None] - for ifindex in ifindexes.keys(): + for ifindex in ifindexes: entry = netfam.qstats_get({"ifindex": ifindex}, dump=True) ksft_eq(len(entry), 1) ifindexes[entry[0]['ifindex']][1] = entry[0] @@ -145,7 +156,7 @@ def qstat_by_ifindex(cfg) -> None: # Try to get stats for lowest unused ifindex but not 0 devs = rtnl.getlink({}, dump=True) - all_ifindexes = set([dev["ifi-index"] for dev in devs]) + all_ifindexes = set(dev["ifi-index"] for dev in devs) lowest = 2 while lowest in all_ifindexes: lowest += 1 @@ -158,18 +169,20 @@ def qstat_by_ifindex(cfg) -> None: @ksft_disruptive def check_down(cfg) -> None: + """ Test statistics (interface and qstat) are not impacted by ifdown """ + try: qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftSkipEx("qstats not supported by the device") + raise KsftSkipEx("qstats not supported by the device") from e raise ip(f"link set dev {cfg.dev['ifname']} down") defer(ip, f"link set dev {cfg.dev['ifname']} up") qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - for k, v in qstat.items(): + for k in qstat: ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down") # exercise per-queue API to make sure that "device down" state @@ -263,6 +276,8 @@ def procfs_downup_hammer(cfg) -> None: def main() -> None: + """ Ksft boiler plate main """ + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, check_down, procfs_hammer, procfs_downup_hammer], diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py new file mode 100755 index 000000000000..1dd8bf3bf6c9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -0,0 +1,658 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +This file contains tests to verify native XDP support in network drivers. +The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib` +directory, with each test focusing on a specific aspect of XDP functionality. +""" +import random +import string +from dataclasses import dataclass +from enum import Enum + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr +from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ip, bpftool, defer + + +class TestConfig(Enum): + """Enum for XDP configuration options.""" + MODE = 0 # Configures the BPF program for a specific test + PORT = 1 # Port configuration to communicate with the remote host + ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking + ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension + + +class XDPAction(Enum): + """Enum for XDP actions.""" + PASS = 0 # Pass the packet up to the stack + DROP = 1 # Drop the packet + TX = 2 # Route the packet to the remote host + TAIL_ADJST = 3 # Adjust the tail of the packet + HEAD_ADJST = 4 # Adjust the head of the packet + + +class XDPStats(Enum): + """Enum for XDP statistics.""" + RX = 0 # Count of valid packets received for testing + PASS = 1 # Count of packets passed up to the stack + DROP = 2 # Count of packets dropped + TX = 3 # Count of incoming packets routed to the remote host + ABORT = 4 # Count of packets that were aborted + + +@dataclass +class BPFProgInfo: + """Data class to store information about a BPF program.""" + name: str # Name of the BPF program + file: str # BPF program object file + xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags") + mtu: int = 1500 # Maximum Transmission Unit, default is 1500 + + +def _exchg_udp(cfg, port, test_string): + """ + Exchanges UDP packets between a local and remote host using the socat tool. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + test_string: String that the remote host will send. + + Returns: + The string received by the test host. + """ + cfg.require_cmd("socat", remote=True) + + rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_udp_cmd, exit_wait=True) as nc: + wait_port_listen(port, proto="udp") + cmd(tx_udp_cmd, host=cfg.remote, shell=True) + + return nc.stdout.strip() + + +def _test_udp(cfg, port, size=256): + """ + Tests UDP packet exchange between a local and remote host. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + size: The length of the test string to be exchanged, default is 256 characters. + + Returns: + bool: True if the received string matches the sent string, False otherwise. + """ + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size)) + recvd_str = _exchg_udp(cfg, port, test_str) + + return recvd_str == test_str + + +def _load_xdp_prog(cfg, bpf_info): + """ + Loads an XDP program onto a network interface. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + + Returns: + dict: A dictionary containing the XDP program ID, name, and associated map IDs. + """ + abs_path = cfg.net_lib_dir / bpf_info.file + prog_info = {} + + cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + + cmd( + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}", + shell=True + ) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + + xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] + prog_info["id"] = xdp_info["xdp"]["prog"]["id"] + prog_info["name"] = xdp_info["xdp"]["prog"]["name"] + prog_id = prog_info["id"] + + map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"] + prog_info["maps"] = {} + for map_id in map_ids: + name = bpftool(f"map show id {map_id}", json=True)["name"] + prog_info["maps"][name] = map_id + + return prog_info + + +def format_hex_bytes(value): + """ + Helper function that converts an integer into a formatted hexadecimal byte string. + + Args: + value: An integer representing the number to be converted. + + Returns: + A string representing hexadecimal equivalent of value, with bytes separated by spaces. + """ + hex_str = value.to_bytes(4, byteorder='little', signed=True) + return ' '.join(f'{byte:02x}' for byte in hex_str) + + +def _set_xdp_map(map_name, key, value): + """ + Updates an XDP map with a given key-value pair using bpftool. + + Args: + map_name: The name of the XDP map to update. + key: The key to update in the map, formatted as a hexadecimal string. + value: The value to associate with the key, formatted as a hexadecimal string. + """ + key_formatted = format_hex_bytes(key) + value_formatted = format_hex_bytes(value) + bpftool( + f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}" + ) + + +def _get_stats(xdp_map_id): + """ + Retrieves and formats statistics from an XDP map. + + Args: + xdp_map_id: The ID of the XDP map from which to retrieve statistics. + + Returns: + A dictionary containing formatted packet statistics for various XDP actions. + The keys are based on the XDPStats Enum values. + + Raises: + KsftFailEx: If the stats retrieval fails. + """ + stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True) + if not stats_dump: + raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}") + + stats_formatted = {} + for key in range(0, 5): + val = stats_dump[key]["formatted"]["value"] + if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value: + stats_formatted[XDPStats.RX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value: + stats_formatted[XDPStats.PASS.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value: + stats_formatted[XDPStats.DROP.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value: + stats_formatted[XDPStats.TX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value: + stats_formatted[XDPStats.ABORT.value] = val + + return stats_formatted + + +def _test_pass(cfg, bpf_info, msg_sz): + """ + Tests the XDP_PASS action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch") + + +def test_xdp_native_pass_sb(cfg): + """ + Tests the XDP_PASS action for single buffer case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_pass(cfg, bpf_info, 256) + + +def test_xdp_native_pass_mb(cfg): + """ + Tests the XDP_PASS action for a multi-buff size. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_pass(cfg, bpf_info, 8000) + + +def _test_drop(cfg, bpf_info, msg_sz): + """ + Tests the XDP_DROP action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch") + + +def test_xdp_native_drop_sb(cfg): + """ + Tests the XDP_DROP action for a signle-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_drop(cfg, bpf_info, 256) + + +def test_xdp_native_drop_mb(cfg): + """ + Tests the XDP_DROP action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_drop(cfg, bpf_info, 8000) + + +def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + cfg.require_cmd("socat", remote=True) + + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000)) + rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + wait_port_listen(port, proto="udp", host=cfg.remote) + cmd(tx_udp, host=cfg.remote, shell=True) + + stats = _get_stats(prog_info['maps']['map_xdp_stats']) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch") + + +def _validate_res(res, offset_lst, pkt_sz_lst): + """ + Validates the result of a test. + + Args: + res: The result of the test, which should be a dictionary with a "status" key. + + Raises: + KsftFailEx: If the test fails to pass any combination of offset and packet size. + """ + if "status" not in res: + raise KsftFailEx("Missing 'status' key in result dictionary") + + # Validate that not a single case was successful + if res["status"] == "fail": + if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]: + raise KsftFailEx(f"{res['reason']}") + + # Get the previous offset and packet size to report the successful run + tmp_idx = offset_lst.index(res["offset"]) + prev_offset = offset_lst[tmp_idx - 1] + if tmp_idx == 0: + tmp_idx = pkt_sz_lst.index(res["pkt_sz"]) + prev_pkt_sz = pkt_sz_lst[tmp_idx - 1] + else: + prev_pkt_sz = res["pkt_sz"] + + # Use these values for error reporting + ksft_pr( + f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. " + f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. " + f"Reason: {res['reason']}" + ) + + +def _check_for_failures(recvd_str, stats): + """ + Checks for common failures while adjusting headroom or tailroom. + + Args: + recvd_str: The string received from the remote host after sending a test string. + stats: A dictionary containing formatted packet statistics for various XDP actions. + + Returns: + str: A string describing the failure reason if a failure is detected, otherwise None. + """ + + # Any adjustment failure result in an abort hence, we track this counter + if stats[XDPStats.ABORT.value] != 0: + return "Adjustment failed" + + # Since we are using aggregate stats for a single test across all offsets and packet sizes + # we can't use RX stats only to track data exchange failure without taking a previous + # snapshot. An easier way is to simply check for non-zero length of received string. + if len(recvd_str) == 0: + return "Data exchange failed" + + # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run + if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]: + return "RX stats mismatch" + + return None + + +def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst): + """ + Tests the XDP tail adjustment functionality. + + This function loads the appropriate XDP program based on the provided + program name and configures the XDP map for tail adjustment. It then + validates the tail adjustment by sending and receiving UDP packets + with specified packet sizes and offsets. + + Args: + cfg: Configuration object containing network settings. + prog: Name of the XDP program to load. + pkt_sz_lst: List of packet sizes to test. + offset_lst: List of offsets to validate support for tail adjustment. + + Returns: + dict: A dictionary with test status and failure details if applicable. + """ + port = rand_port() + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + prog_info = _load_xdp_prog(cfg, bpf_info) + + # Configure the XDP map for tail adjustment + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + for offset in offset_lst: + tag = format(random.randint(65, 90), "02x") + + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + if offset > 0: + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + + for pkt_sz in pkt_sz_lst: + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + recvd_str = _exchg_udp(cfg, port, test_str) + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + failure = _check_for_failures(recvd_str, stats) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz, + } + + # Validate data content based on offset direction + expected_data = None + if offset > 0: + expected_data = test_str + (offset * chr(int(tag, 16))) + else: + expected_data = test_str[0:pkt_sz + offset] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz, + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_tail_grow_data(cfg): + """ + Tests the XDP tail adjustment by growing packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [1, 16, 32, 64, 128, 256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_tail_shrnk_data(cfg): + """ + Tests the XDP tail adjustment by shrinking packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def get_hds_thresh(cfg): + """ + Retrieves the header data split (HDS) threshold for a network interface. + + Args: + cfg: Configuration object containing network settings. + + Returns: + The HDS threshold value. If the threshold is not supported or an error occurs, + a default value of 1500 is returned. + """ + netnl = cfg.netnl + hds_thresh = 1500 + + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' not in rings: + ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}') + return hds_thresh + hds_thresh = rings['hds-thresh'] + except NlError as e: + ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}") + + return hds_thresh + + +def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst): + """ + Tests the XDP head adjustment action for a multi-buffer case. + + Args: + cfg: Configuration object containing network settings. + netnl: Network namespace or link object (not used in this function). + + This function sets up the packet size and offset lists, then performs + the head adjustment test by sending and receiving UDP packets. + """ + cfg.require_cmd("socat", remote=True) + + prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000)) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + hds_thresh = get_hds_thresh(cfg) + for offset in offset_lst: + for pkt_sz in pkt_sz_lst: + # The "head" buffer must contain at least the Ethernet header + # after we eat into it. We send large-enough packets, but if HDS + # is enabled head will only contain headers. Don't try to eat + # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14) + l2_cut_off = 28 if cfg.addr_ipver == 4 else 48 + if pkt_sz > hds_thresh and offset > l2_cut_off: + ksft_pr( + f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and " + f"offset {offset} > {l2_cut_off}" + ) + return {"status": "pass"} + + test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + tag = format(random.randint(65, 90), '02x') + + _set_xdp_map("map_xdp_setup", + TestConfig.ADJST_OFFSET.value, + offset) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + + recvd_str = _exchg_udp(cfg, port, test_str) + + # Check for failures around adjustment and data exchange + failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats'])) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz + } + + # Validate data content based on offset direction + expected_data = None + if offset < 0: + expected_data = chr(int(tag, 16)) * (0 - offset) + test_str + else: + expected_data = test_str[offset:] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_head_grow_data(cfg): + """ + Tests the XDP headroom growth support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully extended for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Negative values result in headroom shrinking, resulting in growing of payload + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_head_shrnk_data(cfg): + """ + Tests the XDP headroom shrinking support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully shrunk for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Positive values result in headroom growing, resulting in shrinking of payload + offset_lst = [16, 32, 64, 128, 256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def main(): + """ + Main function to execute the XDP tests. + + This function runs a series of tests to validate the XDP support for + both the single and multi-buffer. It uses the NetDrvEpEnv context + manager to manage the network driver environment and the ksft_run + function to execute the tests. + """ + with NetDrvEpEnv(__file__) as cfg: + cfg.netnl = EthtoolFamily() + ksft_run( + [ + test_xdp_native_pass_sb, + test_xdp_native_pass_mb, + test_xdp_native_drop_sb, + test_xdp_native_drop_mb, + test_xdp_native_tx_mb, + test_xdp_native_adjst_tail_grow_data, + test_xdp_native_adjst_tail_shrnk_data, + test_xdp_native_adjst_head_grow_data, + test_xdp_native_adjst_head_shrnk_data, + ], + args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index 73f6c6fcecab..2506f464811b 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -16,6 +16,18 @@ ocnt=`cat enabled_functions | wc -l` echo "f:myevent1 $PLACE" >> dynamic_events +echo "f:myevent2 $PLACE%return" >> dynamic_events + +# add another event +echo "f:myevent3 $PLACE2" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +grep -q myevent3 dynamic_events +test -d events/fprobes/myevent1 +test -d events/fprobes/myevent2 + +echo 1 > events/fprobes/myevent1/enable # Make sure the event is attached and is the only one grep -q $PLACE enabled_functions cnt=`cat enabled_functions | wc -l` @@ -23,29 +35,22 @@ if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi -echo "f:myevent2 $PLACE%return" >> dynamic_events - +echo 1 > events/fprobes/myevent2/enable # It should till be the only attached function cnt=`cat enabled_functions | wc -l` if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi -# add another event -echo "f:myevent3 $PLACE2" >> dynamic_events - +echo 1 > events/fprobes/myevent3/enable +# If the function is different, the attached function should be increased grep -q $PLACE2 enabled_functions cnt=`cat enabled_functions | wc -l` if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi -grep -q myevent1 dynamic_events -grep -q myevent2 dynamic_events -grep -q myevent3 dynamic_events -test -d events/fprobes/myevent1 -test -d events/fprobes/myevent2 - +echo 0 > events/fprobes/myevent2/enable echo "-:myevent2" >> dynamic_events grep -q myevent1 dynamic_events @@ -57,6 +62,7 @@ if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi +echo 0 > events/fprobes/enable echo > dynamic_events # Should have none left @@ -67,12 +73,14 @@ fi echo "f:myevent4 $PLACE" >> dynamic_events +echo 1 > events/fprobes/myevent4/enable # Should only have one enabled cnt=`cat enabled_functions | wc -l` if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi +echo 0 > events/fprobes/enable echo > dynamic_events # Should have none left diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index b1f40857307d..38c51158adf8 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -135,6 +135,7 @@ CONFIG_NET_EMATCH=y CONFIG_NETFILTER_NETLINK_LOG=y CONFIG_NETFILTER_NETLINK_QUEUE=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y CONFIG_NETFILTER_XT_MATCH_BPF=y CONFIG_NETFILTER_XT_MATCH_COMMENT=y diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 38b95998e1e6..f6fe7a07a0a2 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -59,6 +59,7 @@ TEST_PROGS_x86 += x86/nx_huge_pages_test.sh TEST_GEN_PROGS_COMMON = demand_paging_test TEST_GEN_PROGS_COMMON += dirty_log_test TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += irqfd_test TEST_GEN_PROGS_COMMON += kvm_binary_stats_test TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus TEST_GEN_PROGS_COMMON += kvm_page_table_test @@ -134,6 +135,7 @@ TEST_GEN_PROGS_x86 += x86/amx_test TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test +TEST_GEN_PROGS_x86 += x86/aperfmperf_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test TEST_GEN_PROGS_x86 += dirty_log_perf_test @@ -156,7 +158,7 @@ TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls -TEST_GEN_PROGS_arm64 += arm64/mmio_abort +TEST_GEN_PROGS_arm64 += arm64/external_aborts TEST_GEN_PROGS_arm64 += arm64/page_fault_test TEST_GEN_PROGS_arm64 += arm64/psci_test TEST_GEN_PROGS_arm64 += arm64/set_id_regs diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index da7196fd1b23..c9de66537ec3 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -596,11 +596,8 @@ int main(int argc, char *argv[]) if (ret) return ret; } else { - page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); - __TEST_REQUIRE(page_idle_fd >= 0, - "Couldn't open /sys/kernel/mm/page_idle/bitmap. " - "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); - + page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR, + "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); close(page_idle_fd); puts("Using page_idle for aging"); diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c index acb2cb596332..cf8fb67104f1 100644 --- a/tools/testing/selftests/kvm/arch_timer.c +++ b/tools/testing/selftests/kvm/arch_timer.c @@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void) static int test_migrate_vcpu(unsigned int vcpu_idx) { int ret; - cpu_set_t cpuset; uint32_t new_pcpu = test_get_pcpu(); - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu); - ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx], - sizeof(cpuset), &cpuset); + ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu); /* Allow the error where the vCPU thread is already finished */ TEST_ASSERT(ret == 0 || ret == ESRCH, diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index 4e71740a098b..ce74d069cb7b 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -862,25 +862,6 @@ static uint32_t next_pcpu(void) return next; } -static void migrate_self(uint32_t new_pcpu) -{ - int ret; - cpu_set_t cpuset; - pthread_t thread; - - thread = pthread_self(); - - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - - pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu); - - ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); - - TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n", - new_pcpu, ret); -} - static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, enum arch_timer timer) { @@ -907,7 +888,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) sched_yield(); break; case USERSPACE_MIGRATE_SELF: - migrate_self(next_pcpu()); + pin_self_to_cpu(next_pcpu()); break; default: break; @@ -919,7 +900,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) struct ucall uc; /* Start on CPU 0 */ - migrate_self(0); + pin_self_to_cpu(0); while (true) { vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index c7fb55c9135b..e34963956fbc 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -140,7 +140,7 @@ static void enable_os_lock(void) static void enable_monitor_debug_exceptions(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); @@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, static void install_ss(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c new file mode 100644 index 000000000000..062bf84cced1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * external_abort - Tests for userspace external abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL +#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed) + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static bool vcpu_has_ras(struct kvm_vcpu *vcpu) +{ + u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0); +} + +static bool guest_has_ras(void) +{ + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1)); +} + +static void vcpu_inject_serror(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.serror_pending = true; + if (vcpu_has_ras(vcpu)) { + events.exception.serror_has_esr = true; + events.exception.serror_esr = EXPECTED_SERROR_ISS; + } + + vcpu_events_set(vcpu, &events); +} + +static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + if (uc.cmd == cmd) + return; + + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_DONE); +} + +static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_SYNC); +} + +extern char test_mmio_abort_insn; + +static noinline void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void unexpected_serror_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Took unexpected SError exception"); +} + +static void test_serror_masked_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + isb(); + + GUEST_DONE(); +} + +static void test_serror_masked(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_serror_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR); + if (guest_has_ras()) + GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS); + + GUEST_DONE(); +} + +static void test_serror_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_emulated_guest(void) +{ + GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); + + local_serror_enable(); + GUEST_SYNC(0); + local_serror_disable(); + + GUEST_FAIL("Should've taken unmasked SError exception"); +} + +static void test_serror_emulated(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_mmio_ease_guest(void) +{ + sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE); + isb(); + + test_mmio_abort_guest(); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_ease(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest, + unexpected_dabt_handler); + struct kvm_run *run = vcpu->run; + u64 pfr1; + + pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) { + pr_debug("Skipping %s\n", __func__); + return; + } + + /* + * SCTLR2_ELx.EASE changes the exception vector to the SError vector but + * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx). + */ + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); + test_serror(); + test_serror_masked(); + test_serror_emulated(); + test_mmio_ease(); +} diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index d01798b6b3b4..011fad95dd02 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -15,6 +15,12 @@ #include "test_util.h" #include "processor.h" +#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + struct feature_id_reg { __u64 reg; __u64 id_reg; @@ -22,37 +28,43 @@ struct feature_id_reg { __u64 feat_min; }; -static struct feature_id_reg feat_id_regs[] = { - { - ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 0, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 - }, - { - ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 +#define FEAT(id, f, v) \ + .id_reg = SYS_REG(id), \ + .feat_shift = id ## _ ## f ## _SHIFT, \ + .feat_min = id ## _ ## f ## _ ## v + +#define REG_FEAT(r, id, f, v) \ + { \ + .reg = SYS_REG(r), \ + FEAT(id, f, v) \ } + +static struct feature_id_reg feat_id_regs[] = { + REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP), + REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), + REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), }; bool filter_reg(__u64 reg) @@ -469,6 +481,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1), ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ @@ -686,6 +699,62 @@ static __u64 pauth_generic_regs[] = { ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */ }; +static __u64 el2_regs[] = { + SYS_REG(VPIDR_EL2), + SYS_REG(VMPIDR_EL2), + SYS_REG(SCTLR_EL2), + SYS_REG(ACTLR_EL2), + SYS_REG(HCR_EL2), + SYS_REG(MDCR_EL2), + SYS_REG(CPTR_EL2), + SYS_REG(HSTR_EL2), + SYS_REG(HFGRTR_EL2), + SYS_REG(HFGWTR_EL2), + SYS_REG(HFGITR_EL2), + SYS_REG(HACR_EL2), + SYS_REG(ZCR_EL2), + SYS_REG(HCRX_EL2), + SYS_REG(TTBR0_EL2), + SYS_REG(TTBR1_EL2), + SYS_REG(TCR_EL2), + SYS_REG(TCR2_EL2), + SYS_REG(VTTBR_EL2), + SYS_REG(VTCR_EL2), + SYS_REG(VNCR_EL2), + SYS_REG(HDFGRTR2_EL2), + SYS_REG(HDFGWTR2_EL2), + SYS_REG(HFGRTR2_EL2), + SYS_REG(HFGWTR2_EL2), + SYS_REG(HDFGRTR_EL2), + SYS_REG(HDFGWTR_EL2), + SYS_REG(HAFGRTR_EL2), + SYS_REG(HFGITR2_EL2), + SYS_REG(SPSR_EL2), + SYS_REG(ELR_EL2), + SYS_REG(AFSR0_EL2), + SYS_REG(AFSR1_EL2), + SYS_REG(ESR_EL2), + SYS_REG(FAR_EL2), + SYS_REG(HPFAR_EL2), + SYS_REG(MAIR_EL2), + SYS_REG(PIRE0_EL2), + SYS_REG(PIR_EL2), + SYS_REG(POR_EL2), + SYS_REG(AMAIR_EL2), + SYS_REG(VBAR_EL2), + SYS_REG(CONTEXTIDR_EL2), + SYS_REG(TPIDR_EL2), + SYS_REG(CNTVOFF_EL2), + SYS_REG(CNTHCTL_EL2), + SYS_REG(CNTHP_CTL_EL2), + SYS_REG(CNTHP_CVAL_EL2), + SYS_REG(CNTHV_CTL_EL2), + SYS_REG(CNTHV_CVAL_EL2), + SYS_REG(SP_EL2), + SYS_REG(VDISR_EL2), + SYS_REG(VSESR_EL2), +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -712,6 +781,14 @@ static __u64 pauth_generic_regs[] = { .regs = pauth_generic_regs, \ .regs_n = ARRAY_SIZE(pauth_generic_regs), \ } +#define EL2_SUBLIST \ + { \ + .name = "EL2", \ + .capability = KVM_CAP_ARM_EL2, \ + .feature = KVM_ARM_VCPU_HAS_EL2, \ + .regs = el2_regs, \ + .regs_n = ARRAY_SIZE(el2_regs), \ + } static struct vcpu_reg_list vregs_config = { .sublists = { @@ -761,6 +838,65 @@ static struct vcpu_reg_list pauth_pmu_config = { }, }; +static struct vcpu_reg_list el2_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, @@ -768,5 +904,12 @@ struct vcpu_reg_list *vcpu_configs[] = { &sve_pmu_config, &pauth_config, &pauth_pmu_config, + + &el2_vregs_config, + &el2_vregs_pmu_config, + &el2_sve_config, + &el2_sve_pmu_config, + &el2_pauth_config, + &el2_pauth_pmu_config, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c deleted file mode 100644 index 8b7a80a51b1c..000000000000 --- a/tools/testing/selftests/kvm/arm64/mmio_abort.c +++ /dev/null @@ -1,159 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * mmio_abort - Tests for userspace MMIO abort injection - * - * Copyright (c) 2024 Google LLC - */ -#include "processor.h" -#include "test_util.h" - -#define MMIO_ADDR 0x8000000ULL - -static u64 expected_abort_pc; - -static void expect_sea_handler(struct ex_regs *regs) -{ - u64 esr = read_sysreg(esr_el1); - - GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); - GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); - GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); - - GUEST_DONE(); -} - -static void unexpected_dabt_handler(struct ex_regs *regs) -{ - GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); -} - -static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, - handler_fn dabt_handler) -{ - struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(*vcpu); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); - - virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); - - return vm; -} - -static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events = {}; - - events.exception.ext_dabt_pending = true; - vcpu_events_set(vcpu, &events); -} - -static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } -} - -extern char test_mmio_abort_insn; - -static void test_mmio_abort_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); - - asm volatile("test_mmio_abort_insn:\n\t" - "ldr x0, [%0]\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that KVM doesn't complete MMIO emulation when userspace has made an - * external abort pending for the instruction. - */ -static void test_mmio_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); - TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); - TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); - TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -extern char test_mmio_nisv_insn; - -static void test_mmio_nisv_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); - - asm volatile("test_mmio_nisv_insn:\n\t" - "ldr x0, [%0], #8\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace - * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. - */ -static void test_mmio_nisv(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - unexpected_dabt_handler); - - TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); - TEST_ASSERT_EQ(errno, ENOSYS); - - kvm_vm_free(vm); -} - -/* - * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA - * reaches the guest. - */ -static void test_mmio_nisv_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); - TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -int main(void) -{ - test_mmio_abort(); - test_mmio_nisv(); - test_mmio_nisv_abort(); -} diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 8f422bfdfcb9..d3bf9204409c 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -139,6 +139,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { }; static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), @@ -187,6 +188,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), @@ -217,6 +226,7 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), + TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1), TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), }; @@ -774,8 +784,8 @@ int main(void) ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 + - MPAM_IDREG_TEST + MTE_IDREG_TEST; + ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - + ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; ksft_set_plan(test_cnt); diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index b3b5fb0ff0a9..a8e0f46bc0ab 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -9,17 +9,18 @@ #include <asm/kvm.h> #include <asm/kvm_para.h> +#include <arm64/gic_v3.h> + #include "test_util.h" #include "kvm_util.h" #include "processor.h" #include "vgic.h" +#include "gic_v3.h" #define NR_VCPUS 4 #define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) -#define GICR_TYPER 0x8 - #define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) #define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) @@ -675,6 +676,44 @@ static void test_v3_its_region(void) vm_gic_destroy(&v); } +static void test_v3_nassgicap(void) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + bool has_nassgicap; + struct vm_gic vm; + u32 typer2; + int ret; + + vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap; + + typer2 |= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + if (has_nassgicap) + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret)); + else + TEST_ASSERT(ret && errno == EINVAL, + "Enabled nASSGIcap even though it's unavailable"); + + typer2 &= ~GICD_TYPER2_nASSGIcap; + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + typer2 ^= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + TEST_ASSERT(ret && errno == EBUSY, + "Changed nASSGIcap after initializing the VGIC"); + + vm_gic_destroy(&vm); +} + /* * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). */ @@ -715,6 +754,220 @@ int test_kvm_device(uint32_t gic_dev_type) return 0; } +struct sr_def { + const char *name; + u32 encoding; +}; + +#define PACK_SR(r) \ + ((sys_reg_Op0(r) << 14) | \ + (sys_reg_Op1(r) << 11) | \ + (sys_reg_CRn(r) << 7) | \ + (sys_reg_CRm(r) << 3) | \ + (sys_reg_Op2(r))) + +#define SR(r) \ + { \ + .name = #r, \ + .encoding = r, \ + } + +static const struct sr_def sysregs_el1[] = { + SR(SYS_ICC_PMR_EL1), + SR(SYS_ICC_BPR0_EL1), + SR(SYS_ICC_AP0R0_EL1), + SR(SYS_ICC_AP0R1_EL1), + SR(SYS_ICC_AP0R2_EL1), + SR(SYS_ICC_AP0R3_EL1), + SR(SYS_ICC_AP1R0_EL1), + SR(SYS_ICC_AP1R1_EL1), + SR(SYS_ICC_AP1R2_EL1), + SR(SYS_ICC_AP1R3_EL1), + SR(SYS_ICC_BPR1_EL1), + SR(SYS_ICC_CTLR_EL1), + SR(SYS_ICC_SRE_EL1), + SR(SYS_ICC_IGRPEN0_EL1), + SR(SYS_ICC_IGRPEN1_EL1), +}; + +static const struct sr_def sysregs_el2[] = { + SR(SYS_ICH_AP0R0_EL2), + SR(SYS_ICH_AP0R1_EL2), + SR(SYS_ICH_AP0R2_EL2), + SR(SYS_ICH_AP0R3_EL2), + SR(SYS_ICH_AP1R0_EL2), + SR(SYS_ICH_AP1R1_EL2), + SR(SYS_ICH_AP1R2_EL2), + SR(SYS_ICH_AP1R3_EL2), + SR(SYS_ICH_HCR_EL2), + SR(SYS_ICC_SRE_EL2), + SR(SYS_ICH_VTR_EL2), + SR(SYS_ICH_VMCR_EL2), + SR(SYS_ICH_LR0_EL2), + SR(SYS_ICH_LR1_EL2), + SR(SYS_ICH_LR2_EL2), + SR(SYS_ICH_LR3_EL2), + SR(SYS_ICH_LR4_EL2), + SR(SYS_ICH_LR5_EL2), + SR(SYS_ICH_LR6_EL2), + SR(SYS_ICH_LR7_EL2), + SR(SYS_ICH_LR8_EL2), + SR(SYS_ICH_LR9_EL2), + SR(SYS_ICH_LR10_EL2), + SR(SYS_ICH_LR11_EL2), + SR(SYS_ICH_LR12_EL2), + SR(SYS_ICH_LR13_EL2), + SR(SYS_ICH_LR14_EL2), + SR(SYS_ICH_LR15_EL2), +}; + +static void test_sysreg_array(int gic, const struct sr_def *sr, int nr, + int (*check)(int, const struct sr_def *, const char *)) +{ + for (int i = 0; i < nr; i++) { + u64 val; + u64 attr; + int ret; + + /* Assume MPIDR_EL1.Aff*=0 */ + attr = PACK_SR(sr[i].encoding); + + /* + * The API is braindead. A register can be advertised as + * available, and yet not be readable or writable. + * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and + * ICH_APnR{1,2,3}_EL2 do follow suit for consistency. + * + * On the bright side, no known HW is implementing more than + * 5 bits of priority, so we're safe. Sort of... + */ + ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr); + TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name); + + /* Check that we can write back what we read */ + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name); + ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name); + } +} + +static u8 get_ctlr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICC_CTLR_EL1), &val); + TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable"); + + pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICC_AP0R1_EL1: + case SYS_ICC_AP1R1_EL1: + if (get_ctlr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICC_AP0R2_EL1: + case SYS_ICC_AP0R3_EL1: + case SYS_ICC_AP1R2_EL1: + case SYS_ICC_AP1R3_EL1: + if (get_ctlr_pribits(gic) == 7) + return 0; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static u8 get_vtr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICH_VTR_EL2), &val); + TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable"); + + pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICH_AP0R1_EL2: + case SYS_ICH_AP1R1_EL2: + if (get_vtr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICH_AP0R2_EL2: + case SYS_ICH_AP0R3_EL2: + case SYS_ICH_AP1R2_EL2: + case SYS_ICH_AP1R3_EL2: + if (get_vtr_pribits(gic) == 7) + return -EINVAL; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static void test_v3_sysregs(void) +{ + struct kvm_vcpu_init init = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + u32 feat = 0; + int gic; + + if (kvm_check_cap(KVM_CAP_ARM_EL2)) + feat |= BIT(KVM_ARM_VCPU_HAS_EL2); + + vm = vm_create(1); + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= feat; + + vcpu = aarch64_vcpu_add(vm, 0, &init, NULL); + TEST_ASSERT(vcpu, "Can't create a vcpu?"); + + gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + TEST_ASSERT(gic >= 0, "No GIC???"); + + kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs); + if (feat) + test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs); + else + pr_info("SKIP EL2 registers, not available\n"); + + close(gic); + kvm_vm_free(vm); +} + void run_tests(uint32_t gic_dev_type) { test_vcpus_then_vgic(gic_dev_type); @@ -730,6 +983,8 @@ void run_tests(uint32_t gic_dev_type) test_v3_last_bit_single_rdist(); test_v3_redist_ipa_range_check_at_vcpu_run(); test_v3_its_region(); + test_v3_sysregs(); + test_v3_nassgicap(); } } diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index f4ac28d53747..a09dd423c2d7 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -620,18 +620,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, * that no actual interrupt was injected for those cases. */ - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - fd[f] = eventfd(0, 0); - TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f])); - } + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) + fd[f] = kvm_new_eventfd(); for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - struct kvm_irqfd irqfd = { - .fd = fd[f], - .gsi = i - MIN_SPI, - }; assert(i <= (uint64_t)UINT_MAX); - vm_ioctl(vm, KVM_IRQFD, &irqfd); + kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]); } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config index 8835fed09e9f..96d874b239eb 100644 --- a/tools/testing/selftests/kvm/config +++ b/tools/testing/selftests/kvm/config @@ -1,5 +1,6 @@ CONFIG_KVM=y CONFIG_KVM_INTEL=y CONFIG_KVM_AMD=y +CONFIG_EVENTFD=y CONFIG_USERFAULTFD=y CONFIG_IDLE_PAGE_TRACKING=y diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index b0fc0f945766..255fed769a8a 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -254,6 +254,16 @@ static inline void local_irq_disable(void) asm volatile("msr daifset, #3" : : : "memory"); } +static inline void local_serror_enable(void) +{ + asm volatile("msr daifclr, #4" : : : "memory"); +} + +static inline void local_serror_disable(void) +{ + asm volatile("msr daifset, #4" : : : "memory"); +} + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bee65ca08721..23a506d7eca3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -18,8 +18,11 @@ #include <asm/atomic.h> #include <asm/kvm.h> +#include <sys/eventfd.h> #include <sys/ioctl.h> +#include <pthread.h> + #include "kvm_util_arch.h" #include "kvm_util_types.h" #include "sparsebit.h" @@ -253,6 +256,7 @@ struct vm_guest_mode_params { }; extern const struct vm_guest_mode_params vm_guest_mode_params[]; +int __open_path_or_exit(const char *path, int flags, const char *enoent_help); int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); @@ -502,6 +506,45 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm) return fd; } +static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + struct kvm_irqfd irqfd = { + .fd = eventfd, + .gsi = gsi, + .flags = flags, + .resamplefd = -1, + }; + + return __vm_ioctl(vm, KVM_IRQFD, &irqfd); +} + +static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + int ret = __kvm_irqfd(vm, gsi, eventfd, flags); + + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm); +} + +static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, 0); +} + +static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +static inline int kvm_new_eventfd(void) +{ + int fd = eventfd(0, 0); + + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd)); + return fd; +} + static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) { ssize_t ret; @@ -1013,7 +1056,34 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); void kvm_set_files_rlimit(uint32_t nr_vcpus); -void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +int __pin_task_to_cpu(pthread_t task, int cpu); + +static inline void pin_task_to_cpu(pthread_t task, int cpu) +{ + int r; + + r = __pin_task_to_cpu(task, cpu); + TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu); +} + +static inline int pin_task_to_any_cpu(pthread_t task) +{ + int cpu = sched_getcpu(); + + pin_task_to_cpu(task, cpu); + return cpu; +} + +static inline void pin_self_to_cpu(int cpu) +{ + pin_task_to_cpu(pthread_self(), cpu); +} + +static inline int pin_self_to_any_cpu(void) +{ + return pin_task_to_any_cpu(pthread_self()); +} + void kvm_print_vcpu_pinning_help(void); void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int nr_vcpus); diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index b11b5a53ebd5..2efb05c2f2fb 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1150,7 +1150,6 @@ do { \ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); void kvm_init_vm_address_properties(struct kvm_vm *vm); -bool vm_is_unrestricted_guest(struct kvm_vm *vm); struct ex_regs { uint64_t rax, rcx, rdx, rbx; @@ -1325,6 +1324,11 @@ static inline bool kvm_is_forced_emulation_enabled(void) return !!get_kvm_param_integer("force_emulation_prefix"); } +static inline bool kvm_is_unrestricted_guest_enabled(void) +{ + return get_kvm_intel_param_bool("unrestricted_guest"); +} + uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level); uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c new file mode 100644 index 000000000000..7c301b4c7005 --- /dev/null +++ b/tools/testing/selftests/kvm/irqfd_test.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <stdint.h> +#include <sys/sysinfo.h> + +#include "kvm_util.h" + +static struct kvm_vm *vm1; +static struct kvm_vm *vm2; +static int __eventfd; +static bool done; + +/* + * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when + * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task + * GSI base to avoid false failures due to cross-task de-assign, i.e. so that + * the secondary doesn't de-assign the primary's eventfd and cause assign to + * unexpectedly succeed on the primary. + */ +#define GSI_BASE_PRIMARY 0x20 +#define GSI_BASE_SECONDARY 0x30 + +static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd) +{ + int r, i; + + /* + * The secondary task can encounter EBADF since the primary can close + * the eventfd at any time. And because the primary can recreate the + * eventfd, at the safe fd in the file table, the secondary can also + * encounter "unexpected" success, e.g. if the close+recreate happens + * between the first and second assignments. The secondary's role is + * mostly to antagonize KVM, not to detect bugs. + */ + for (i = 0; i < 2; i++) { + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0); + TEST_ASSERT(!r || errno == EBUSY || errno == EBADF, + "Wanted success, EBUSY, or EBADF, r = %d, errno = %d", + r, errno); + + /* De-assign should succeed unless the eventfd was closed. */ + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + TEST_ASSERT(!r || errno == EBADF, + "De-assign should succeed unless the fd was closed"); + } +} + +static void *secondary_irqfd_juggler(void *ign) +{ + while (!READ_ONCE(done)) { + juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd)); + juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd)); + } + + return NULL; +} + +static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd) +{ + int r1, r2; + + /* + * At least one of the assigns should fail. KVM disallows assigning a + * single eventfd to multiple GSIs (or VMs), so it's possible that both + * assignments can fail, too. + */ + r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0); + TEST_ASSERT(!r1 || errno == EBUSY, + "Wanted success or EBUSY, r = %d, errno = %d", r1, errno); + + r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0); + TEST_ASSERT(r1 || (r2 && errno == EBUSY), + "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d", + r1, r2, errno); + + /* + * De-assign should always succeed, even if the corresponding assign + * failed. + */ + kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +int main(int argc, char *argv[]) +{ + pthread_t racing_thread; + int r, i; + + /* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */ + vm1 = vm_create(1); + vm2 = vm_create(1); + + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + kvm_irqfd(vm1, 10, __eventfd, 0); + + r = __kvm_irqfd(vm1, 11, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + r = __kvm_irqfd(vm2, 12, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + /* + * De-assign all eventfds, along with multiple eventfds that were never + * assigned. KVM's ABI is that de-assign is allowed so long as the + * eventfd itself is valid. + */ + kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + + close(__eventfd); + + pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2); + + for (i = 0; i < 10000; i++) { + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + juggle_eventfd_primary(vm1, __eventfd); + juggle_eventfd_primary(vm2, __eventfd); + close(__eventfd); + } + + WRITE_ONCE(done, true); + pthread_join(racing_thread, NULL); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a055343a7bf7..c3f5142b0a54 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -26,15 +26,27 @@ static uint32_t last_guest_seed; static int vcpu_mmap_sz(void); -int open_path_or_exit(const char *path, int flags) +int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); - TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); + if (fd < 0) + goto error; return fd; + +error: + if (errno == EACCES || errno == ENOENT) + ksft_exit_skip("- Cannot open '%s': %s. %s\n", + path, strerror(errno), + errno == EACCES ? "Root required?" : enoent_help); + TEST_FAIL("Failed to open '%s'", path); +} + +int open_path_or_exit(const char *path, int flags) +{ + return __open_path_or_exit(path, flags, ""); } /* @@ -48,7 +60,7 @@ int open_path_or_exit(const char *path, int flags) */ static int _open_kvm_dev_path_or_exit(int flags) { - return open_path_or_exit(KVM_DEV_PATH, flags); + return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?"); } int open_kvm_dev_path_or_exit(void) @@ -64,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param, ssize_t bytes_read; int fd, r; + /* Verify KVM is loaded, to provide a more helpful SKIP message. */ + close(open_kvm_dev_path_or_exit()); + r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", module_name, param); TEST_ASSERT(r < path_size, @@ -605,15 +620,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } -void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +int __pin_task_to_cpu(pthread_t task, int cpu) { - cpu_set_t mask; - int r; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); - CPU_ZERO(&mask); - CPU_SET(pcpu, &mask); - r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); + return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset); } static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) @@ -667,7 +681,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 2. Check if the main worker needs to be pinned. */ if (cpu) { - kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask)); cpu = strtok(NULL, delim); } @@ -1716,7 +1730,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) /* Create an interrupt controller chip for the specified VM. */ void vm_create_irqchip(struct kvm_vm *vm) { - vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + int r; + + /* + * Allocate a fully in-kernel IRQ chip by default, but fall back to a + * split model (x86 only) if that fails (KVM x86 allows compiling out + * support for KVM_CREATE_IRQCHIP). + */ + r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP)) + vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24); + else + TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm); vm->has_irqchip = true; } diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 313277486a1d..557c0a0a5658 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -265,7 +265,7 @@ static void *vcpu_thread_main(void *data) int vcpu_idx = vcpu->vcpu_idx; if (memstress_args.pin_vcpus) - kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); + pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index cfed9d26cc71..a99188f87a38 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -116,7 +116,7 @@ * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * @@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. */ diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index a92dc1dad085..d4c19ac885a9 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1264,16 +1264,6 @@ done: return min(max_gfn, ht_gfn - 1); } -/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ -bool vm_is_unrestricted_guest(struct kvm_vm *vm) -{ - /* Ensure that a KVM vendor-specific module is loaded. */ - if (vm == NULL) - close(open_kvm_dev_path_or_exit()); - - return get_kvm_intel_param_bool("unrestricted_guest"); -} - void kvm_selftest_arch_init(void) { host_cpu_is_intel = this_cpu_is_intel(); diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c new file mode 100644 index 000000000000..8b15a13df939 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for KVM_X86_DISABLE_EXITS_APERFMPERF + * + * Copyright (C) 2025, Google LLC. + * + * Test the ability to disable VM-exits for rdmsr of IA32_APERF and + * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs + * return the host's values. + * + * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs. + */ + +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <asm/msr-index.h> + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "test_util.h" +#include "vmx.h" + +#define NUM_ITERATIONS 10000 + +static int open_dev_msr(int cpu) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu); + return open_path_or_exit(path, O_RDONLY); +} + +static uint64_t read_dev_msr(int msr_fd, uint32_t msr) +{ + uint64_t data; + ssize_t rc; + + rc = pread(msr_fd, &data, sizeof(data), msr); + TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr); + + return data; +} + +static void guest_read_aperf_mperf(void) +{ + int i; + + for (i = 0; i < NUM_ITERATIONS; i++) + GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF)); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_read_aperf_mperf(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set + * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel. + */ + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *nested_test_data) +{ + guest_read_aperf_mperf(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(nested_test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(nested_test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +static void guest_no_aperfmperf(void) +{ + uint64_t msr_val; + uint8_t vector; + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + uint64_t host_aperf_before, host_mperf_before; + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int msr_fd, cpu, i; + + /* Sanity check that APERF/MPERF are unsupported by default. */ + vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + kvm_vm_free(vm); + + cpu = pin_self_to_any_cpu(); + + msr_fd = open_dev_msr(cpu); + + /* + * This test requires a non-standard VM initialization, because + * KVM_ENABLE_CAP cannot be used on a VM file descriptor after + * a VCPU has been created. + */ + vm = vm_create(1); + + TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) & + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (!has_nested) + nested_test_data_gva = NONCANONICAL; + else if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) { + uint64_t host_aperf_after, host_mperf_after; + uint64_t guest_aperf, guest_mperf; + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + guest_aperf = uc.args[0]; + guest_mperf = uc.args[1]; + + host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + TEST_ASSERT(host_aperf_before < guest_aperf, + "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_aperf_before, guest_aperf); + TEST_ASSERT(guest_aperf < host_aperf_after, + "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_aperf, host_aperf_after); + TEST_ASSERT(host_mperf_before < guest_mperf, + "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_mperf_before, guest_mperf); + TEST_ASSERT(guest_mperf < host_mperf_after, + "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_mperf, host_mperf_after); + + host_aperf_before = host_aperf_after; + host_mperf_before = host_mperf_after; + + break; + } + } + TEST_FAIL("Didn't receive UCALL_DONE\n"); +done: + kvm_vm_free(vm); + close(msr_fd); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 32b2794b78fe..8463a9956410 100644 --- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void) data = test_rdmsr(MSR_GS_BASE); GUEST_ASSERT(data == MSR_GS_BASE); + /* Access the MSRs again to ensure KVM has disabled interception.*/ + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data != MSR_FS_BASE); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data != MSR_GS_BASE); + GUEST_DONE(); } @@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap) "Expected ucall state to be UCALL_SYNC."); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); run_guest_then_process_ucall_done(vcpu); } diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c index 3fd6eceab46f..2cae86d9d5e2 100644 --- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c @@ -110,7 +110,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; TEST_REQUIRE(host_cpu_is_intel); - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); + TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled()); vm = vm_create_with_one_vcpu(&vcpu, guest_code); get_set_sigalrm_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index 287829f850f7..23909b501ac2 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -547,15 +547,9 @@ int main(int argc, char *argv[]) int irq_fd[2] = { -1, -1 }; if (do_eventfd_tests) { - irq_fd[0] = eventfd(0, 0); - irq_fd[1] = eventfd(0, 0); + irq_fd[0] = kvm_new_eventfd(); + irq_fd[1] = kvm_new_eventfd(); - /* Unexpected, but not a KVM failure */ - if (irq_fd[0] == -1 || irq_fd[1] == -1) - do_evtchn_tests = do_eventfd_tests = false; - } - - if (do_eventfd_tests) { irq_routes.info.nr = 2; irq_routes.entries[0].gsi = 32; @@ -572,15 +566,8 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); - struct kvm_irqfd ifd = { }; - - ifd.fd = irq_fd[0]; - ifd.gsi = 32; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - ifd.fd = irq_fd[1]; - ifd.gsi = 33; - vm_ioctl(vm, KVM_IRQFD, &ifd); + kvm_assign_irqfd(vm, 32, irq_fd[0]); + kvm_assign_irqfd(vm, 33, irq_fd[1]); struct sigaction sa = { }; sa.sa_handler = handle_alrm; diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c6dd2a335cf4..47c293c2962f 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -34,6 +34,7 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello +scm_inq scm_pidfd scm_rights sk_bind_sendto_listen diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 332f387615d7..b31a71f2b372 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -41,6 +41,7 @@ TEST_PROGS += netns-name.sh TEST_PROGS += link_netns.py TEST_PROGS += nl_netdev.py TEST_PROGS += rtnetlink.py +TEST_PROGS += rtnetlink_notification.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -62,6 +63,7 @@ TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS += netns-sysctl.sh +TEST_PROGS += netdev-l2addr.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -99,6 +101,7 @@ TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh +TEST_PROGS += test_neigh.sh TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh @@ -112,6 +115,8 @@ TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh +TEST_PROGS += broadcast_pmtu.sh +TEST_PROGS += ipv6_force_forwarding.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 50584479540b..a4b61c6d0290 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect +TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c new file mode 100644 index 000000000000..9d22561e7b8f --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "../../kselftest_harness.h" + +#define NR_CHUNKS 100 +#define MSG_LEN 256 + +struct scm_inq { + struct cmsghdr cmsghdr; + int inq; +}; + +FIXTURE(scm_inq) +{ + int fd[2]; +}; + +FIXTURE_VARIANT(scm_inq) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(scm_inq, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(scm_inq) +{ + int err; + + err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(scm_inq) +{ + close(self->fd[0]); + close(self->fd[1]); +} + +static void send_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + char buf[MSG_LEN] = {}; + int i, ret; + + for (i = 0; i < NR_CHUNKS; i++) { + ret = send(self->fd[0], buf, sizeof(buf), 0); + ASSERT_EQ(sizeof(buf), ret); + } +} + +static void recv_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + struct msghdr msg = {}; + struct iovec iov = {}; + struct scm_inq cmsg; + char buf[MSG_LEN]; + int i, ret; + int inq; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = &cmsg; + msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + for (i = 0; i < NR_CHUNKS; i++) { + memset(buf, 0, sizeof(buf)); + memset(&cmsg, 0, sizeof(cmsg)); + + ret = recvmsg(self->fd[1], &msg, 0); + ASSERT_EQ(MSG_LEN, ret); + ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); + ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + ret = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, ret); + ASSERT_EQ(cmsg.inq, inq); + } +} + +TEST_F(scm_inq, basic) +{ + int err, inq; + + err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int)); + if (variant->type != SOCK_STREAM) { + ASSERT_EQ(-ENOPROTOOPT, -errno); + return; + } + + ASSERT_EQ(0, err); + + err = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, err); + ASSERT_EQ(0, inq); + + send_chunks(_metadata, self); + recv_chunks(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile new file mode 100644 index 000000000000..2546c45e42f7 --- /dev/null +++ b/tools/testing/selftests/net/bench/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_MODS_DIR := page_pool + +TEST_PROGS += test_bench_page_pool.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile new file mode 100644 index 000000000000..0549a16ba275 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/Makefile @@ -0,0 +1,17 @@ +BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= /lib/modules/$(shell uname -r)/build + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +obj-m += bench_page_pool.o +bench_page_pool-y += bench_page_pool_simple.o time_bench.o + +all: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c new file mode 100644 index 000000000000..cb6468adbda4 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmark module for page_pool. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/interrupt.h> +#include <linux/limits.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <net/page_pool/helpers.h> + +#include "time_bench.h" + +static int verbose = 1; +#define MY_POOL_SIZE 1024 + +/* Makes tests selectable. Useful for perf-record to analyze a single test. + * Hint: Bash shells support writing binary number like: $((2#101010) + * + * # modprobe bench_page_pool_simple run_flags=$((2#100)) + */ +static unsigned long run_flags = 0xFFFFFFFF; +module_param(run_flags, ulong, 0); +MODULE_PARM_DESC(run_flags, "Limit which bench test that runs"); + +/* Count the bit number from the enum */ +enum benchmark_bit { + bit_run_bench_baseline, + bit_run_bench_no_softirq01, + bit_run_bench_no_softirq02, + bit_run_bench_no_softirq03, +}; + +#define bit(b) (1 << (b)) +#define enabled(b) ((run_flags & (bit(b)))) + +/* notice time_bench is limited to U32_MAX nr loops */ +static unsigned long loops = 10000000; +module_param(loops, ulong, 0); +MODULE_PARM_DESC(loops, "Specify loops bench will run"); + +/* Timing at the nanosec level, we need to know the overhead + * introduced by the for loop itself + */ +static int time_bench_for_loop(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + int i; + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +static int time_bench_atomic_inc(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + atomic_t cnt; + int i; + + atomic_set(&cnt, 0); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + atomic_inc(&cnt); + barrier(); /* avoid compiler to optimize this loop */ + } + loops_cnt = atomic_read(&cnt); + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum + * overhead of taking+releasing a spinlock, to know the cycles that can be saved + * by e.g. amortizing this via bulking. + */ +static int time_bench_lock(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + spinlock_t lock; + int i; + + spin_lock_init(&lock); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + spin_lock(&lock); + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + spin_unlock(&lock); + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* Helper for filling some page's into ptr_ring */ +static void pp_fill_ptr_ring(struct page_pool *pp, int elems) +{ + /* GFP_ATOMIC needed when under run softirq */ + gfp_t gfp_mask = GFP_ATOMIC; + struct page **array; + int i; + + array = kcalloc(elems, sizeof(struct page *), gfp_mask); + + for (i = 0; i < elems; i++) + array[i] = page_pool_alloc_pages(pp, gfp_mask); + for (i = 0; i < elems; i++) + page_pool_put_page(pp, array[i], -1, false); + + kfree(array); +} + +enum test_type { type_fast_path, type_ptr_ring, type_page_allocator }; + +/* Depends on compile optimizing this function */ +static int time_bench_page_pool(struct time_bench_record *rec, void *data, + enum test_type type, const char *func) +{ + uint64_t loops_cnt = 0; + gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */ + int i, err; + + struct page_pool *pp; + struct page *page; + + struct page_pool_params pp_params = { + .order = 0, + .flags = 0, + .pool_size = MY_POOL_SIZE, + .nid = NUMA_NO_NODE, + .dev = NULL, /* Only use for DMA mapping */ + .dma_dir = DMA_BIDIRECTIONAL, + }; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) { + err = PTR_ERR(pp); + pr_warn("%s: Error(%d) creating page_pool\n", func, err); + goto out; + } + pp_fill_ptr_ring(pp, 64); + + if (in_serving_softirq()) + pr_warn("%s(): in_serving_softirq fast-path\n", func); + else + pr_warn("%s(): Cannot use page_pool fast-path\n", func); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + /* Common fast-path alloc that depend on in_serving_softirq() */ + page = page_pool_alloc_pages(pp, gfp_mask); + if (!page) + break; + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + + /* The benchmarks purpose it to test different return paths. + * Compiler should inline optimize other function calls out + */ + if (type == type_fast_path) { + /* Fast-path recycling e.g. XDP_DROP use-case */ + page_pool_recycle_direct(pp, page); + + } else if (type == type_ptr_ring) { + /* Normal return path */ + page_pool_put_page(pp, page, -1, false); + + } else if (type == type_page_allocator) { + /* Test if not pages are recycled, but instead + * returned back into systems page allocator + */ + get_page(page); /* cause no-recycling */ + page_pool_put_page(pp, page, -1, false); + put_page(page); + } else { + BUILD_BUG(); + } + } + time_bench_stop(rec, loops_cnt); +out: + page_pool_destroy(pp); + return loops_cnt; +} + +static int time_bench_page_pool01_fast_path(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_fast_path, __func__); +} + +static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_ptr_ring, __func__); +} + +static int time_bench_page_pool03_slow(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_page_allocator, __func__); +} + +static int run_benchmark_tests(void) +{ + uint32_t nr_loops = loops; + + /* Baseline tests */ + if (enabled(bit_run_bench_baseline)) { + time_bench_loop(nr_loops * 10, 0, "for_loop", NULL, + time_bench_for_loop); + time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL, + time_bench_atomic_inc); + time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock); + } + + /* This test cannot activate correct code path, due to no-softirq ctx */ + if (enabled(bit_run_bench_no_softirq01)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL, + time_bench_page_pool01_fast_path); + if (enabled(bit_run_bench_no_softirq02)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL, + time_bench_page_pool02_ptr_ring); + if (enabled(bit_run_bench_no_softirq03)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL, + time_bench_page_pool03_slow); + + return 0; +} + +static int __init bench_page_pool_simple_module_init(void) +{ + if (verbose) + pr_info("Loaded\n"); + + if (loops > U32_MAX) { + pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops, + U32_MAX); + return -ECHRNG; + } + + run_benchmark_tests(); + + return 0; +} +module_init(bench_page_pool_simple_module_init); + +static void __exit bench_page_pool_simple_module_exit(void) +{ + if (verbose) + pr_info("Unloaded\n"); +} +module_exit(bench_page_pool_simple_module_exit); + +MODULE_DESCRIPTION("Benchmark of page_pool simple cases"); +MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c new file mode 100644 index 000000000000..073bb36ec5f2 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c @@ -0,0 +1,394 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/time.h> + +#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */ + +/* For concurrency testing */ +#include <linux/completion.h> +#include <linux/sched.h> +#include <linux/workqueue.h> +#include <linux/kthread.h> + +#include "time_bench.h" + +static int verbose = 1; + +/** TSC (Time-Stamp Counter) based ** + * See: linux/time_bench.h + * tsc_start_clock() and tsc_stop_clock() + */ + +/** Wall-clock based ** + */ + +/** PMU (Performance Monitor Unit) based ** + */ +#define PERF_FORMAT \ + (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \ + PERF_FORMAT_TOTAL_TIME_RUNNING) + +struct raw_perf_event { + uint64_t config; /* event */ + uint64_t config1; /* umask */ + struct perf_event *save; + char *desc; +}; + +/* if HT is enable a maximum of 4 events (5 if one is instructions + * retired can be specified, if HT is disabled a maximum of 8 (9 if + * one is instructions retired) can be specified. + * + * From Table 19-1. Architectural Performance Events + * Architectures Software Developer’s Manual Volume 3: System Programming + * Guide + */ +struct raw_perf_event perf_events[] = { + { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" }, + { 0xc0, 0x00, NULL, "Instruction Retired" } +}; + +#define NUM_EVTS (ARRAY_SIZE(perf_events)) + +/* WARNING: PMU config is currently broken! + */ +bool time_bench_PMU_config(bool enable) +{ + int i; + struct perf_event_attr perf_conf; + struct perf_event *perf_event; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + pr_info("DEBUG: cpu:%d\n", cpu); + preempt_enable(); + + memset(&perf_conf, 0, sizeof(struct perf_event_attr)); + perf_conf.type = PERF_TYPE_RAW; + perf_conf.size = sizeof(struct perf_event_attr); + perf_conf.read_format = PERF_FORMAT; + perf_conf.pinned = 1; + perf_conf.exclude_user = 1; /* No userspace events */ + perf_conf.exclude_kernel = 0; /* Only kernel events */ + + for (i = 0; i < NUM_EVTS; i++) { + perf_conf.disabled = enable; + //perf_conf.disabled = (i == 0) ? 1 : 0; + perf_conf.config = perf_events[i].config; + perf_conf.config1 = perf_events[i].config1; + if (verbose) + pr_info("%s() enable PMU counter: %s\n", + __func__, perf_events[i].desc); + perf_event = perf_event_create_kernel_counter(&perf_conf, cpu, + NULL /* task */, + NULL /* overflow_handler*/, + NULL /* context */); + if (perf_event) { + perf_events[i].save = perf_event; + pr_info("%s():DEBUG perf_event success\n", __func__); + + perf_event_enable(perf_event); + } else { + pr_info("%s():DEBUG perf_event is NULL\n", __func__); + } + } + + return true; +} + +/** Generic functions ** + */ + +/* Calculate stats, store results in record */ +bool time_bench_calc_stats(struct time_bench_record *rec) +{ +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ + uint64_t ns_per_call_tmp_rem = 0; + uint32_t ns_per_call_remainder = 0; + uint64_t pmc_ipc_tmp_rem = 0; + uint32_t pmc_ipc_remainder = 0; + uint32_t pmc_ipc_div = 0; + uint32_t invoked_cnt_precision = 0; + uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */ + + if (rec->flags & TIME_BENCH_LOOP) { + if (rec->invoked_cnt < 1000) { + pr_err("ERR: need more(>1000) loops(%llu) for timing\n", + rec->invoked_cnt); + return false; + } + if (rec->invoked_cnt > ((1ULL << 32) - 1)) { + /* div_u64_rem() can only support div with 32bit*/ + pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n", + rec->invoked_cnt); + return false; + } + invoked_cnt = (uint32_t)rec->invoked_cnt; + } + + /* TSC (Time-Stamp Counter) records */ + if (rec->flags & TIME_BENCH_TSC) { + rec->tsc_interval = rec->tsc_stop - rec->tsc_start; + if (rec->tsc_interval == 0) { + pr_err("ABORT: timing took ZERO TSC time\n"); + return false; + } + /* Calculate stats */ + if (rec->flags & TIME_BENCH_LOOP) + rec->tsc_cycles = rec->tsc_interval / invoked_cnt; + else + rec->tsc_cycles = rec->tsc_interval; + } + + /* Wall-clock time calc */ + if (rec->flags & TIME_BENCH_WALLCLOCK) { + rec->time_start = rec->ts_start.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_start.tv_sec); + rec->time_stop = rec->ts_stop.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_stop.tv_sec); + rec->time_interval = rec->time_stop - rec->time_start; + if (rec->time_interval == 0) { + pr_err("ABORT: timing took ZERO wallclock time\n"); + return false; + } + /* Calculate stats */ + /*** Division in kernel it tricky ***/ + /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */ + /* remainder only correct because NANOSEC_PER_SEC is 10^9 */ + rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC, + &rec->time_sec_remainder); + //TODO: use existing struct timespec records instead of div? + + if (rec->flags & TIME_BENCH_LOOP) { + /*** Division in kernel it tricky ***/ + /* Orig: ns = ((double)time_interval / invoked_cnt); */ + /* First get quotient */ + rec->ns_per_call_quotient = + div_u64_rem(rec->time_interval, invoked_cnt, + &ns_per_call_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + ns_per_call_tmp_rem = ns_per_call_remainder; + invoked_cnt_precision = invoked_cnt / 1000; + if (invoked_cnt_precision > 0) { + rec->ns_per_call_decimal = + div_u64_rem(ns_per_call_tmp_rem, + invoked_cnt_precision, + &ns_per_call_remainder); + } + } + } + + /* Performance Monitor Unit (PMU) counters */ + if (rec->flags & TIME_BENCH_PMU) { + //FIXME: Overflow handling??? + rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start; + rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start; + + /* Calc Instruction Per Cycle (IPC) */ + /* First get quotient */ + rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk, + &pmc_ipc_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + pmc_ipc_tmp_rem = pmc_ipc_remainder; + pmc_ipc_div = rec->pmc_clk / 1000; + if (pmc_ipc_div > 0) { + rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem, + pmc_ipc_div, + &pmc_ipc_remainder); + } + } + + return true; +} + +/* Generic function for invoking a loop function and calculating + * execution time stats. The function being called/timed is assumed + * to perform a tight loop, and update the timing record struct. + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *record, void *data)) +{ + struct time_bench_record rec; + + /* Setup record */ + memset(&rec, 0, sizeof(rec)); /* zero func might not update all */ + rec.version_abi = 1; + rec.loops = loops; + rec.step = step; + rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK); + + /*** Loop function being timed ***/ + if (!func(&rec, data)) { + pr_err("ABORT: function being timed failed\n"); + return false; + } + + if (rec.invoked_cnt < loops) + pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n", + rec.invoked_cnt, loops); + + /* Calculate stats */ + time_bench_calc_stats(&rec); + + pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + txt, rec.tsc_cycles, rec.ns_per_call_quotient, + rec.ns_per_call_decimal, rec.step, rec.time_sec, + rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt, + rec.tsc_interval); + if (rec.flags & TIME_BENCH_PMU) + pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n", + txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient, + rec.pmc_ipc_decimal); + return true; +} + +/* Function getting invoked by kthread */ +static int invoke_test_on_cpu_func(void *private) +{ + struct time_bench_cpu *cpu = private; + struct time_bench_sync *sync = cpu->sync; + cpumask_t newmask = CPU_MASK_NONE; + void *data = cpu->data; + + /* Restrict CPU */ + cpumask_set_cpu(cpu->rec.cpu, &newmask); + set_cpus_allowed_ptr(current, &newmask); + + /* Synchronize start of concurrency test */ + atomic_inc(&sync->nr_tests_running); + wait_for_completion(&sync->start_event); + + /* Start benchmark function */ + if (!cpu->bench_func(&cpu->rec, data)) { + pr_err("ERROR: function being timed failed on CPU:%d(%d)\n", + cpu->rec.cpu, smp_processor_id()); + } else { + if (verbose) + pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu, + smp_processor_id()); + } + cpu->did_bench_run = true; + + /* End test */ + atomic_dec(&sync->nr_tests_running); + /* Wait for kthread_stop() telling us to stop */ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask) +{ + uint64_t average = 0; + int cpu; + int step = 0; + struct sum { + uint64_t tsc_cycles; + int records; + } sum = { 0 }; + + /* Get stats */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + struct time_bench_record *rec = &c->rec; + + /* Calculate stats */ + time_bench_calc_stats(rec); + + pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient, + rec->ns_per_call_decimal, rec->step, rec->time_sec, + rec->time_sec_remainder, rec->time_interval, + rec->invoked_cnt, rec->tsc_interval); + + /* Collect average */ + sum.records++; + sum.tsc_cycles += rec->tsc_cycles; + step = rec->step; + } + + if (sum.records) /* avoid div-by-zero */ + average = sum.tsc_cycles / sum.records; + pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc, + average, sum.records, step); +} + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, + struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)) +{ + int cpu, running = 0; + + if (verbose) // DEBUG + pr_warn("%s() Started on CPU:%d\n", __func__, + smp_processor_id()); + + /* Reset sync conditions */ + atomic_set(&sync->nr_tests_running, 0); + init_completion(&sync->start_event); + + /* Spawn off jobs on all CPUs */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + running++; + c->sync = sync; /* Send sync variable along */ + c->data = data; /* Send opaque along */ + + /* Init benchmark record */ + memset(&c->rec, 0, sizeof(struct time_bench_record)); + c->rec.version_abi = 1; + c->rec.loops = loops; + c->rec.step = step; + c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | + TIME_BENCH_WALLCLOCK); + c->rec.cpu = cpu; + c->bench_func = func; + c->task = kthread_run(invoke_test_on_cpu_func, c, + "time_bench%d", cpu); + if (IS_ERR(c->task)) { + pr_err("%s(): Failed to start test func\n", __func__); + return; /* Argh, what about cleanup?! */ + } + } + + /* Wait until all processes are running */ + while (atomic_read(&sync->nr_tests_running) < running) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + /* Kick off all CPU concurrently on completion event */ + complete_all(&sync->start_event); + + /* Wait for CPUs to finish */ + while (atomic_read(&sync->nr_tests_running)) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + + /* Stop the kthreads */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + kthread_stop(c->task); + } + + if (verbose) // DEBUG - happens often, finish on another CPU + pr_warn("%s() Finished on CPU:%d\n", __func__, + smp_processor_id()); +} diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h new file mode 100644 index 000000000000..e113fcf341dc --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + * for licensing details see kernel-base/COPYING + */ +#ifndef _LINUX_TIME_BENCH_H +#define _LINUX_TIME_BENCH_H + +/* Main structure used for recording a benchmark run */ +struct time_bench_record { + uint32_t version_abi; + uint32_t loops; /* Requested loop invocations */ + uint32_t step; /* option for e.g. bulk invocations */ + + uint32_t flags; /* Measurements types enabled */ +#define TIME_BENCH_LOOP BIT(0) +#define TIME_BENCH_TSC BIT(1) +#define TIME_BENCH_WALLCLOCK BIT(2) +#define TIME_BENCH_PMU BIT(3) + + uint32_t cpu; /* Used when embedded in time_bench_cpu */ + + /* Records */ + uint64_t invoked_cnt; /* Returned actual invocations */ + uint64_t tsc_start; + uint64_t tsc_stop; + struct timespec64 ts_start; + struct timespec64 ts_stop; + /* PMU counters for instruction and cycles + * instructions counter including pipelined instructions + */ + uint64_t pmc_inst_start; + uint64_t pmc_inst_stop; + /* CPU unhalted clock counter */ + uint64_t pmc_clk_start; + uint64_t pmc_clk_stop; + + /* Result records */ + uint64_t tsc_interval; + uint64_t time_start, time_stop, time_interval; /* in nanosec */ + uint64_t pmc_inst, pmc_clk; + + /* Derived result records */ + uint64_t tsc_cycles; // +decimal? + uint64_t ns_per_call_quotient, ns_per_call_decimal; + uint64_t time_sec; + uint32_t time_sec_remainder; + uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */ +}; + +/* For synchronizing parallel CPUs to run concurrently */ +struct time_bench_sync { + atomic_t nr_tests_running; + struct completion start_event; +}; + +/* Keep track of CPUs executing our bench function. + * + * Embed a time_bench_record for storing info per cpu + */ +struct time_bench_cpu { + struct time_bench_record rec; + struct time_bench_sync *sync; /* back ptr */ + struct task_struct *task; + /* "data" opaque could have been placed in time_bench_sync, + * but to avoid any false sharing, place it per CPU + */ + void *data; + /* Support masking outsome CPUs, mark if it ran */ + bool did_bench_run; + /* int cpu; // note CPU stored in time_bench_record */ + int (*bench_func)(struct time_bench_record *record, void *data); +}; + +/* + * Below TSC assembler code is not compatible with other archs, and + * can also fail on guests if cpu-flags are not correct. + * + * The way TSC reading is used, many iterations, does not require as + * high accuracy as described below (in Intel Doc #324264). + * + * Considering changing to use get_cycles() (#include <asm/timex.h>). + */ + +/** TSC (Time-Stamp Counter) based ** + * Recommend reading, to understand details of reading TSC accurately: + * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel" + * + * Consider getting exclusive ownership of CPU by using: + * unsigned long flags; + * preempt_disable(); + * raw_local_irq_save(flags); + * _your_code_ + * raw_local_irq_restore(flags); + * preempt_enable(); + * + * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx" + * RDTSC only change "%rax" and "%rdx" but + * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx) + */ +static __always_inline uint64_t tsc_start_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("CPUID\n\t" + "RDTSC\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + //FIXME: on 32bit use clobbered %eax + %edx + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +static __always_inline uint64_t tsc_stop_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("RDTSCP\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + "CPUID\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +/** Wall-clock based ** + * + * use: getnstimeofday() + * getnstimeofday(&rec->ts_start); + * getnstimeofday(&rec->ts_stop); + * + * API changed see: Documentation/core-api/timekeeping.rst + * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday + * + * We should instead use: ktime_get_real_ts64() is a direct + * replacement, but consider using monotonic time (ktime_get_ts64()) + * and/or a ktime_t based interface (ktime_get()/ktime_get_real()). + */ + +/** PMU (Performance Monitor Unit) based ** + * + * Needed for calculating: Instructions Per Cycle (IPC) + * - The IPC number tell how efficient the CPU pipelining were + */ +//lookup: perf_event_create_kernel_counter() + +bool time_bench_PMU_config(bool enable); + +/* Raw reading via rdpmc() using fixed counters + * + * From: https://github.com/andikleen/simple-pmu + */ +enum { + FIXED_SELECT = (1U << 30), /* == 0x40000000 */ + FIXED_INST_RETIRED_ANY = 0, + FIXED_CPU_CLK_UNHALTED_CORE = 1, + FIXED_CPU_CLK_UNHALTED_REF = 2, +}; + +static __always_inline unsigned int long long p_rdpmc(unsigned int in) +{ + unsigned int d, a; + + asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory"); + return ((unsigned long long)d << 32) | a; +} + +/* These PMU counter needs to be enabled, but I don't have the + * configure code implemented. My current hack is running: + * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko + */ +/* Reading all pipelined instruction */ +static __always_inline unsigned long long pmc_inst(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY); +} + +/* Reading CPU clock cycles */ +static __always_inline unsigned long long pmc_clk(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE); +} + +/* Raw reading via MSR rdmsr() is likely wrong + * FIXME: How can I know which raw MSR registers are conf for what? + */ +#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */ +#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */ +#define MSR_IA32_PCM2 0x400000C3 +static inline uint64_t msr_inst(unsigned long long *msr_result) +{ + return rdmsrq_safe(MSR_IA32_PCM0, msr_result); +} + +/** Generic functions ** + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *rec, void *data)); +bool time_bench_calc_stats(struct time_bench_record *rec); + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)); +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask); + +//FIXME: use rec->flags to select measurement, should be MACRO +static __always_inline void time_bench_start(struct time_bench_record *rec) +{ + //getnstimeofday(&rec->ts_start); + ktime_get_real_ts64(&rec->ts_start); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_start = pmc_inst(); + rec->pmc_clk_start = pmc_clk(); + } + rec->tsc_start = tsc_start_clock(); +} + +static __always_inline void time_bench_stop(struct time_bench_record *rec, + uint64_t invoked_cnt) +{ + rec->tsc_stop = tsc_stop_clock(); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_stop = pmc_inst(); + rec->pmc_clk_stop = pmc_clk(); + } + //getnstimeofday(&rec->ts_stop); + ktime_get_real_ts64(&rec->ts_stop); + rec->invoked_cnt = invoked_cnt; +} + +#endif /* _LINUX_TIME_BENCH_H */ diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh new file mode 100755 index 000000000000..7b8b18cfedce --- /dev/null +++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +set -e + +DRIVER="./page_pool/bench_page_pool.ko" +result="" + +function run_test() +{ + rmmod "bench_page_pool.ko" || true + insmod $DRIVER > /dev/null 2>&1 + result=$(dmesg | tail -10) + echo "$result" + + echo + echo "Fast path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "ptr_ring results:" + echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "slow path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" +} + +run_test + +exit 0 diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh new file mode 100755 index 000000000000..726eb5d25839 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_pmtu.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Ensures broadcast route MTU is respected + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="192.168.0.1/24" +CLIENT_BROADCAST_ADDRESS="192.168.0.255" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="192.168.0.2/24" + +setup() { + ip netns add "${CLIENT_NS}" + ip netns add "${SERVER_NS}" + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" link set link0 mtu 9000 + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" link set link1 mtu 1500 + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1 + + read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)" + ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}" + ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500 + + ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0 +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + ip netns del "${CLIENT_NS}" + ip netns del "${SERVER_NS}" +} + +trap cleanup EXIT + +setup && + echo "Testing for broadcast route MTU" && + ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1 + +exit $? + diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3cfef5153823..c24417d0047b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -30,16 +30,25 @@ CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NF_CONNTRACK=m CONFIG_IPV6_MROUTE=y CONFIG_IPV6_SIT=y CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_RAW=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y @@ -57,6 +66,8 @@ CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_TARGET_HL=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 00bde7b6f39e..d7bb2e80e88c 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -102,6 +102,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_bridge_1d_port_8472.sh \ vxlan_bridge_1d.sh \ vxlan_bridge_1q_ipv6.sh \ + vxlan_bridge_1q_mc_ul.sh \ vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 508f3c700d71..890b3374dacd 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -37,6 +37,7 @@ declare -A NETIFS=( : "${TEAMD:=teamd}" : "${MCD:=smcrouted}" : "${MC_CLI:=smcroutectl}" +: "${MCD_TABLE_NAME:=selftests}" # Constants for netdevice bring-up: # Default time in seconds to wait for an interface to come up before giving up @@ -141,6 +142,20 @@ check_tc_version() fi } +check_tc_erspan_support() +{ + local dev=$1; shift + + tc filter add dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing erspan support" + return $ksft_skip + fi + tc filter del dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null +} + # Old versions of tc don't understand "mpls_uc" check_tc_mpls_support() { @@ -525,9 +540,9 @@ setup_wait_dev_with_timeout() return 1 } -setup_wait() +setup_wait_n() { - local num_netifs=${1:-$NUM_NETIFS} + local num_netifs=$1; shift local i for ((i = 1; i <= num_netifs; ++i)); do @@ -538,6 +553,11 @@ setup_wait() sleep $WAIT_TIME } +setup_wait() +{ + setup_wait_n "$NUM_NETIFS" +} + wait_for_dev() { local dev=$1; shift @@ -1757,6 +1777,51 @@ mc_send() msend -g $groups -I $if_name -c 1 > /dev/null 2>&1 } +adf_mcd_start() +{ + local ifs=("$@") + + local table_name="$MCD_TABLE_NAME" + local smcroutedir + local pid + local if + local i + + check_command "$MCD" || return 1 + check_command "$MC_CLI" || return 1 + + smcroutedir=$(mktemp -d) + defer rm -rf "$smcroutedir" + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + "$smcroutedir/$table_name.conf" + done + + for if in "${ifs[@]}"; do + if ! ip_link_has_flag "$if" MULTICAST; then + ip link set dev "$if" multicast on + defer ip link set dev "$if" multicast off + fi + + echo "phyint $if enable" >> \ + "$smcroutedir/$table_name.conf" + done + + "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \ + -P "$smcroutedir/$table_name.pid" + busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid" + pid=$(cat "$smcroutedir/$table_name.pid") + defer kill_process "$pid" +} + +mc_cli() +{ + local table_name="$MCD_TABLE_NAME" + + "$MC_CLI" -I "$table_name" "$@" +} + start_ip_monitor() { local mtype=$1; shift diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh index 5a58b1ec8aef..83e52abdbc2e 100755 --- a/tools/testing/selftests/net/forwarding/router_multicast.sh +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -33,10 +33,6 @@ NUM_NETIFS=6 source lib.sh source tc_common.sh -require_command $MCD -require_command $MC_CLI -table_name=selftests - h1_create() { simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 @@ -149,25 +145,6 @@ router_destroy() ip link set dev $rp1 down } -start_mcd() -{ - SMCROUTEDIR="$(mktemp -d)" - - for ((i = 1; i <= $NUM_NETIFS; ++i)); do - echo "phyint ${NETIFS[p$i]} enable" >> \ - $SMCROUTEDIR/$table_name.conf - done - - $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ - -P $SMCROUTEDIR/$table_name.pid -} - -kill_mcd() -{ - pkill $MCD - rm -rf $SMCROUTEDIR -} - setup_prepare() { h1=${NETIFS[p1]} @@ -179,7 +156,7 @@ setup_prepare() rp3=${NETIFS[p5]} h3=${NETIFS[p6]} - start_mcd + adf_mcd_start || exit "$EXIT_STATUS" vrf_prepare @@ -206,7 +183,7 @@ cleanup() vrf_cleanup - kill_mcd + defer_scopes_cleanup } create_mcast_sg() @@ -214,9 +191,9 @@ create_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs + mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } delete_mcast_sg() @@ -224,9 +201,9 @@ delete_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs + mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } mcast_v4() diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index b1daad19b01e..b58909a93112 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ match_ip_tos_test match_indev_test match_ip_ttl_test match_mpls_label_test \ match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \ - match_mpls_lse_test" + match_mpls_lse_test match_erspan_opts_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -676,6 +676,56 @@ match_mpls_lse_test() log_test "mpls lse match ($tcflags)" } +match_erspan_opts_test() +{ + RET=0 + + check_tc_erspan_support $h2 || return 0 + + # h1 erspan setup + tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \ + tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II + tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \ + tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \ + # ERSPAN Type III + ip link set dev erspan1 master v$h1 + ip link set dev erspan2 master v$h1 + # h2 erspan setup + ip link add ep-ex type erspan ttl 64 external # To collect tunnel info + ip link set ep-ex up + ip link set dev ep-ex master v$h2 + tc qdisc add dev ep-ex clsact + + # ERSPAN Type II [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 101 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1001 erspan_opts 1:6789:0:0 \ + action drop + # ERSPAN Type III [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 102 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1002 erspan_opts 2:0:1:63 action drop + + ep1mac=$(mac_get erspan1) + $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 101 1 + check_err $? "ERSPAN Type II" + + ep2mac=$(mac_get erspan2) + $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 102 1 + check_err $? "ERSPAN Type III" + + # h2 erspan cleanup + tc qdisc del dev ep-ex clsact + tunnel_destroy ep-ex + # h1 erspan cleanup + tunnel_destroy erspan2 # ERSPAN Type III + tunnel_destroy erspan1 # ERSPAN Type II + + log_test "erspan_opts match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh new file mode 100755 index 000000000000..462db0b603e7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -0,0 +1,771 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------------------------+ +# | + $h1.10 + $h1.20 | +# | | 192.0.2.1/28 | 2001:db8:1::1/64 | +# | \________ ________/ | +# | \ / | +# | + $h1 H1 (vrf) | +# +-----------|-----------------------------+ +# | +# +-----------|----------------------------------------------------------------+ +# | +---------|--------------------------------------+ SWITCH (main vrf) | +# | | + $swp1 BR1 (802.1q) | | +# | | vid 10 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) | +# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 | +# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 | +# | | id 1000 id 2000 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +------------------------------------------------+ | +# | | +# | + $swp2 $swp3 + | +# | | 192.0.2.33/28 192.0.2.65/28 | | +# | | 2001:db8:2::1/64 2001:db8:3::1/64 | | +# | | | | +# +---|--------------------------------------------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | H2 (vrf) | | H3 (vrf) | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | | +# | | | | | | | | +# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | | | | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + | +# | 192.0.2.34/28 | | 192.0.2.66/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# | | | | +# | +--------------------------------+ | | +--------------------------------+ | +# | | BR1 (802.1q) | | | | BR1 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | | +# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 20 | | | | | vid 10 20 | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | | | | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | | +# | +--------------------------------+ | | +--------------------------------+ | +# +------------------------------------+ +------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. + +: "${VXPORT:=4789}" +export VXPORT + +: "${GROUP4:=233.252.0.1}" +export GROUP4 + +: "${GROUP6:=ff0e::1:2:3}" +export GROUP6 + +: "${IPMR:=lo10}" + +ALL_TESTS=" + ipv4_nomcroute + ipv4_mcroute + ipv4_mcroute_changelink + ipv4_mcroute_starg + ipv4_mcroute_noroute + ipv4_mcroute_fdb + ipv4_mcroute_fdb_oif0 + ipv4_mcroute_fdb_oif0_sep + + ipv6_nomcroute + ipv6_mcroute + ipv6_mcroute_changelink + ipv6_mcroute_starg + ipv6_mcroute_noroute + ipv6_mcroute_fdb + ipv6_mcroute_fdb_oif0 + + ipv4_nomcroute_rx + ipv4_mcroute_rx + ipv4_mcroute_starg_rx + ipv4_mcroute_fdb_oif0_sep_rx + ipv4_mcroute_fdb_sep_rx + + ipv6_nomcroute_rx + ipv6_mcroute_rx + ipv6_mcroute_starg_rx + ipv6_mcroute_fdb_sep_rx +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init "$h1" + defer simple_if_fini "$h1" + + ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 + ip_link_set_up "$h1.10" + ip_addr_add "$h1.10" 192.0.2.1/28 + + ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 + ip_link_set_up "$h1.20" + ip_addr_add "$h1.20" 2001:db8:1::1/64 +} + +install_capture() +{ + local dev=$1; shift + + tc qdisc add dev "$dev" clsact + defer tc qdisc del dev "$dev" clsact + + tc filter add dev "$dev" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ip pref 104 + + tc filter add dev "$dev" ingress proto ipv6 pref 106 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ipv6 pref 106 +} + +h2_create() +{ + # $h2 + ip_link_set_up "$h2" + + # H2 + vrf_create "v$h2" + defer vrf_destroy "v$h2" + + ip_link_set_up "v$h2" + + # br2 + ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 + ip_link_set_master br2 "v$h2" + ip_link_set_up br2 + + # $h2 + ip_link_set_master "$h2" br2 + install_capture "$h2" + + # v1$h2 + ip_link_set_up "v1$h2" + ip_link_set_master "v1$h2" br2 +} + +h3_create() +{ + # $h3 + ip_link_set_up "$h3" + + # H3 + vrf_create "v$h3" + defer vrf_destroy "v$h3" + + ip_link_set_up "v$h3" + + # br3 + ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 + ip_link_set_master br3 "v$h3" + ip_link_set_up br3 + + # $h3 + ip_link_set_master "$h3" br3 + install_capture "$h3" + + # v1$h3 + ip_link_set_up "v1$h3" + ip_link_set_master "v1$h3" br3 +} + +switch_create() +{ + local swp1_mac + + # br1 + swp1_mac=$(mac_get "$swp1") + ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + ip_link_set_addr br1 "$swp1_mac" + ip_link_set_up br1 + + # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on + # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test. + ip_link_add "X$IPMR" up type dummy + + # IPMR + ip_link_add "$IPMR" up type dummy + ip_addr_add "$IPMR" 192.0.2.100/28 + ip_addr_add "$IPMR" 2001:db8:4::1/64 + + # $swp1 + ip_link_set_up "$swp1" + ip_link_set_master "$swp1" br1 + bridge_vlan_add vid 10 dev "$swp1" + bridge_vlan_add vid 20 dev "$swp1" + + # $swp2 + ip_link_set_up "$swp2" + ip_addr_add "$swp2" 192.0.2.33/28 + ip_addr_add "$swp2" 2001:db8:2::1/64 + + # $swp3 + ip_link_set_up "$swp3" + ip_addr_add "$swp3" 192.0.2.65/28 + ip_addr_add "$swp3" 2001:db8:3::1/64 +} + +vx_create() +{ + local name=$1; shift + local vid=$1; shift + + ip_link_add "$name" up type vxlan dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 16 \ + "$@" + ip_link_set_master "$name" br1 + bridge_vlan_add vid "$vid" dev "$name" pvid untagged +} +export -f vx_create + +vx_wait() +{ + # Wait for all the ARP, IGMP etc. noise to settle down so that the + # tunnel is clear for measurements. + sleep 10 +} + +vx10_create() +{ + vx_create vx10 10 id 1000 "$@" +} +export -f vx10_create + +vx20_create() +{ + vx_create vx20 20 id 2000 "$@" +} +export -f vx20_create + +vx10_create_wait() +{ + vx10_create "$@" + vx_wait +} + +vx20_create_wait() +{ + vx20_create "$@" + vx_wait +} + +ns_init_common() +{ + local ns=$1; shift + local if_in=$1; shift + local ipv4_in=$1; shift + local ipv6_in=$1; shift + local ipv4_host=$1; shift + local ipv6_host=$1; shift + + # v2$h2 / v2$h3 + ip_link_set_up "$if_in" + ip_addr_add "$if_in" "$ipv4_in" + ip_addr_add "$if_in" "$ipv6_in" + + # br1 + ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + ip_link_set_up br1 + + # vx10, vx20 + vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in" + vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in" + + # w1 + ip_link_add w1 type veth peer name w2 + ip_link_set_master w1 br1 + ip_link_set_up w1 + bridge_vlan_add vid 10 dev w1 + bridge_vlan_add vid 20 dev w1 + + # w2 + simple_if_init w2 + defer simple_if_fini w2 + + # w2.10 + ip_link_add w2.10 master vw2 link w2 type vlan id 10 + ip_link_set_up w2.10 + ip_addr_add w2.10 "$ipv4_host" + + # w2.20 + ip_link_add w2.20 master vw2 link w2 type vlan id 20 + ip_link_set_up w2.20 + ip_addr_add w2.20 "$ipv6_host" +} +export -f ns_init_common + +ns2_create() +{ + # NS2 + ip netns add ns2 + defer ip netns del ns2 + + # v2$h2 + ip link set dev "v2$h2" netns ns2 + defer ip -n ns2 link set dev "v2$h2" netns 1 + + in_ns ns2 \ + ns_init_common ns2 "v2$h2" \ + 192.0.2.34/28 2001:db8:2::2/64 \ + 192.0.2.3/28 2001:db8:1::3/64 +} + +ns3_create() +{ + # NS3 + ip netns add ns3 + defer ip netns del ns3 + + # v2$h3 + ip link set dev "v2$h3" netns ns3 + defer ip -n ns3 link set dev "v2$h3" netns 1 + + ip -n ns3 link set dev "v2$h3" up + + in_ns ns3 \ + ns_init_common ns3 "v2$h3" \ + 192.0.2.66/28 2001:db8:3::2/64 \ + 192.0.2.4/28 2001:db8:1::4/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + defer vrf_cleanup + + forwarding_enable + defer forwarding_restore + + ip_link_add "v1$h2" type veth peer name "v2$h2" + ip_link_add "v1$h3" type veth peer name "v2$h3" + + h1_create + h2_create + h3_create + switch_create + ns2_create + ns3_create +} + +adf_install_broken_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + + mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" +} + +adf_install_rx() +{ + mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp2" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp2" :: "$GROUP6" lo10 + + mc_cli add "$swp3" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp3" :: "$GROUP6" lo10 +} + +adf_install_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_sg_sep() +{ + adf_mcd_start lo || exit "$EXIT_STATUS" + + mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" +} + +adf_install_sg_sep_rx() +{ + local lo=$1; shift + + adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS" + + mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_starg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +do_packets_v4() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q +} + +do_packets_v6() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q +} + +do_test() +{ + local ipv=$1; shift + local expect_h2=$1; shift + local expect_h3=$1; shift + local what=$1; shift + + local pref=$((100 + ipv)) + local t0_h2 + local t0_h3 + local t1_h2 + local t1_h3 + local d_h2 + local d_h3 + + RET=0 + + t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + "do_packets_v$ipv" + sleep 1 + + t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + d_h2=$((t1_h2 - t0_h2)) + d_h3=$((t1_h3 - t0_h3)) + + ((d_h2 == expect_h2)) + check_err $? "Expected $expect_h2 packets on H2, got $d_h2" + + ((d_h3 == expect_h3)) + check_err $? "Expected $expect_h3 packets on H3, got $d_h3" + + log_test "VXLAN MC flood $what" +} + +ipv4_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping_do "$h1.10" 192.0.2.3 + check_err $? "H2 should respond" + + ping_do "$h1.10" 192.0.2.4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv6_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping6_do "$h1.20" 2001:db8:1::3 + check_err $? "H2 should respond" + + ping6_do "$h1.20" 2001:db8:1::4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv4_nomcroute() +{ + # Install a misleading (S,G) rule to attempt to trick the system into + # pushing the packets elsewhere. + adf_install_broken_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2" + do_test 4 10 0 "IPv4 nomcroute" +} + +ipv6_nomcroute() +{ + # Like for IPv4, install a misleading (S,G). + adf_install_broken_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + do_test 6 10 0 "IPv6 nomcroute" +} + +ipv4_nomcroute_rx() +{ + vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2" + ipv4_do_test_rx 1 "IPv4 nomcroute ping" +} + +ipv6_nomcroute_rx() +{ + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + ipv6_do_test_rx 1 "IPv6 nomcroute ping" +} + +ipv4_mcroute() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute" +} + +ipv6_mcroute() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute" +} + +ipv4_mcroute_rx() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute ping" +} + +ipv6_mcroute_rx() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute ping" +} + +ipv4_mcroute_changelink() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" + ip link set dev vx10 type vxlan mcroute + sleep 1 + do_test 4 10 10 "IPv4 mcroute changelink" +} + +ipv6_mcroute_changelink() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ip link set dev vx20 type vxlan mcroute + sleep 1 + do_test 6 10 10 "IPv6 mcroute changelink" +} + +ipv4_mcroute_starg() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute (*,G)" +} + +ipv6_mcroute_starg() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute (*,G)" +} + +ipv4_mcroute_starg_rx() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping" +} + +ipv6_mcroute_starg_rx() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping" +} + +ipv4_mcroute_noroute() +{ + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 0 0 "IPv4 mcroute, no route" +} + +ipv6_mcroute_noroute() +{ + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 0 0 "IPv6 mcroute, no route" +} + +ipv4_mcroute_fdb() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute + bridge fdb add dev vx10 \ + 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR" + do_test 4 10 10 "IPv4 mcroute FDB" +} + +ipv6_mcroute_fdb() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute + bridge -6 fdb add dev vx20 \ + 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR" + do_test 6 10 10 "IPv6 mcroute FDB" +} + +# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup. +ipv4_mcroute_fdb_oif0() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute oif=0" +} + +ipv6_mcroute_fdb_oif0() +{ + # The IPv6 tunnel lookup does not fall back to selection by source + # address. Instead it just does a FIB match, and that would find one of + # the several ff00::/8 multicast routes -- each device has one. In order + # to reliably force the $IPMR device, add a /128 route for the + # destination group address. + ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR" + defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR" + + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6" + do_test 6 10 10 "IPv6 mcroute oif=0" +} + +# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR. +# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT. +ipv4_mcroute_fdb_oif0_sep() +{ + adf_install_sg_sep + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0" +} + +ipv4_mcroute_fdb_oif0_sep_rx() +{ + adf_install_sg_sep_rx lo + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping" +} + +ipv4_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx lo + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add \ + dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping" +} + +ipv6_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx "X$IPMR" + + ip_addr_add "X$IPMR" 2001:db8:5::1/64 + vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ + self static dst "$GROUP6" via "X$IPMR" + ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh new file mode 100755 index 000000000000..bf0243366caa --- /dev/null +++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test IPv6 force_forwarding interface property +# +# This test verifies that the force_forwarding property works correctly: +# - When global forwarding is disabled, packets are not forwarded normally +# - When force_forwarding is enabled on an interface, packets are forwarded +# regardless of the global forwarding setting + +source lib.sh + +cleanup() { + cleanup_ns $ns1 $ns2 $ns3 +} + +trap cleanup EXIT + +setup_test() { + # Create three namespaces: sender, router, receiver + setup_ns ns1 ns2 ns3 + + # Create veth pairs: ns1 <-> ns2 <-> ns3 + ip link add name veth12 type veth peer name veth21 + ip link add name veth23 type veth peer name veth32 + + # Move interfaces to namespaces + ip link set veth12 netns $ns1 + ip link set veth21 netns $ns2 + ip link set veth23 netns $ns2 + ip link set veth32 netns $ns3 + + # Configure interfaces + ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad + ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad + ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad + ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad + + # Bring up interfaces + ip -n $ns1 link set veth12 up + ip -n $ns2 link set veth21 up + ip -n $ns2 link set veth23 up + ip -n $ns3 link set veth32 up + + # Add routes + ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2 + ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1 + + # Disable global forwarding + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0 +} + +test_force_forwarding() { + local ret=0 + + echo "TEST: force_forwarding functionality" + + # Check if force_forwarding sysctl exists + if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then + echo "SKIP: force_forwarding not available" + return $ksft_skip + fi + + # Test 1: Without force_forwarding, ping should fail + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "FAIL: ping succeeded when forwarding disabled" + ret=1 + else + echo "PASS: forwarding disabled correctly" + fi + + # Test 2: With force_forwarding enabled, ping should succeed + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "PASS: force_forwarding enabled forwarding" + else + echo "FAIL: ping failed with force_forwarding enabled" + ret=1 + fi + + return $ret +} + +echo "IPv6 force_forwarding test" +echo "==========================" + +setup_test +test_force_forwarding +ret=$? + +if [ $ret -eq 0 ]; then + echo "OK" + exit 0 +elif [ $ret -eq $ksft_skip ]; then + echo "SKIP" + exit $ksft_skip +else + echo "FAIL" + exit 1 +fi diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 86a216e9aca8..c7add0dc4c60 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -240,6 +240,29 @@ create_netdevsim() { echo nsim$id } +create_netdevsim_port() { + local nsim_id="$1" + local ns="$2" + local port_id="$3" + local perm_addr="$4" + local orig_dev + local new_dev + local nsim_path + + nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id" + + echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1 + + orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1) + orig_dev=$(basename "$orig_dev") + new_dev="nsim${nsim_id}p$port_id" + + ip -netns "$ns" link set dev "$orig_dev" name "$new_dev" + ip -netns "$ns" link set dev "$new_dev" up + + echo "$new_dev" +} + # Remove netdevsim with given id. cleanup_netdevsim() { local id="$1" @@ -547,13 +570,19 @@ ip_link_set_addr() defer ip link set dev "$name" address "$old_addr" } -ip_link_is_up() +ip_link_has_flag() { local name=$1; shift + local flag=$1; shift local state=$(ip -j link show "$name" | - jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') - [[ $state == "UP" ]] + jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)') + [[ $state == true ]] +} + +ip_link_is_up() +{ + ip_link_has_flag "$1" UP } ip_link_set_up() diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 8697bd27dc30..02be28dcc089 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily -from .ynl import NetshaperFamily +from .ynl import NetshaperFamily, DevlinkFamily diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 61287c203b6e..8e35ed12ed9e 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -32,6 +32,7 @@ class KsftTerminate(KeyboardInterrupt): def ksft_pr(*objs, **kwargs): + kwargs["flush"] = True print("#", *objs, **kwargs) @@ -139,7 +140,7 @@ def ktap_result(ok, cnt=1, case="", comment=""): res += "." + str(case.__name__) if comment: res += " # " + comment - print(res) + print(res, flush=True) def ksft_flush_defer(): @@ -227,8 +228,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} - print("TAP version 13") - print("1.." + str(len(cases))) + print("TAP version 13", flush=True) + print("1.." + str(len(cases)), flush=True) global KSFT_RESULT cnt = 0 diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 34470d65d871..f395c90fb0f1 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -175,6 +175,10 @@ def tool(name, args, json=None, ns=None, host=None): return cmd_obj +def bpftool(args, json=None, ns=None, host=None): + return tool('bpftool', args, json=json, ns=ns, host=host) + + def ip(args, json=None, ns=None, host=None): if ns: args = f'-netns {ns} ' + args @@ -185,6 +189,41 @@ def ethtool(args, json=None, ns=None, host=None): return tool('ethtool', args, json=json, ns=ns, host=host) +def bpftrace(expr, json=None, ns=None, host=None, timeout=None): + """ + Run bpftrace and return map data (if json=True). + The output of bpftrace is inconvenient, so the helper converts + to a dict indexed by map name, e.g.: + { + "@": { ... }, + "@map2": { ... }, + } + """ + cmd_arr = ['bpftrace'] + # Throw in --quiet if json, otherwise the output has two objects + if json: + cmd_arr += ['-f', 'json', '-q'] + if timeout: + expr += ' interval:s:' + str(timeout) + ' { exit(); }' + cmd_arr += ['-e', expr] + cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False) + if json: + # bpftrace prints objects as lines + ret = {} + for l in cmd_obj.stdout.split('\n'): + if not l.strip(): + continue + one = _json.loads(l) + if one.get('type') != 'map': + continue + for k, v in one["data"].items(): + if k.startswith('@'): + k = k.lstrip('@') + ret[k] = v + return ret + return cmd_obj + + def rand_port(type=socket.SOCK_STREAM): """ Get a random unprivileged port. diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 6329ae805abf..2b3a61ea3bfa 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -56,3 +56,8 @@ class NetshaperFamily(YnlFamily): def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), schema='', recv_size=recv_size) + +class DevlinkFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c new file mode 100644 index 000000000000..521ba38f2ddd --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#define MAX_ADJST_OFFSET 256 +#define MAX_PAYLOAD_LEN 5000 +#define MAX_HDR_LEN 64 + +enum { + XDP_MODE = 0, + XDP_PORT = 1, + XDP_ADJST_OFFSET = 2, + XDP_ADJST_TAG = 3, +} xdp_map_setup_keys; + +enum { + XDP_MODE_PASS = 0, + XDP_MODE_DROP = 1, + XDP_MODE_TX = 2, + XDP_MODE_TAIL_ADJST = 3, + XDP_MODE_HEAD_ADJST = 4, +} xdp_map_modes; + +enum { + STATS_RX = 0, + STATS_PASS = 1, + STATS_DROP = 2, + STATS_TX = 3, + STATS_ABORT = 4, +} xdp_stats; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __s32); +} map_xdp_setup SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __u64); +} map_xdp_stats SEC(".maps"); + +static __u32 min(__u32 a, __u32 b) +{ + return a < b ? a : b; +} + +static void record_stats(struct xdp_md *ctx, __u32 stat_type) +{ + __u64 *count; + + count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type); + + if (count) + __sync_fetch_and_add(count, 1); +} + +static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + } else { + return NULL; + } + + if (udph + 1 > (struct udphdr *)data_end) + return NULL; + + if (udph->dest != bpf_htons(port)) + return NULL; + + record_stats(ctx, STATS_RX); + + return udph; +} + +static int xdp_mode_pass(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_PASS); + + return XDP_PASS; +} + +static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_DROP); + + return XDP_DROP; +} + +static void swap_machdr(void *data) +{ + struct ethhdr *eth = data; + __u8 tmp_mac[ETH_ALEN]; + + __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN); + __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN); + __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN); +} + +static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return XDP_PASS; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __be32 tmp_ip = iph->saddr; + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*iph) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + iph->saddr = iph->daddr; + iph->daddr = tmp_ip; + + record_stats(ctx, STATS_TX); + + return XDP_TX; + + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + struct in6_addr tmp_ipv6; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*ipv6h) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr, + sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6)); + + record_stats(ctx, STATS_TX); + + return XDP_TX; + } + + return XDP_PASS; +} + +static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + __u32 len, len_new; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __u16 total_len; + + if (iph + 1 > (struct iphdr *)data_end) + return NULL; + + iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + __u16 payload_len; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + *udp_csum = ~((__u32)udph->check); + + len = ipv6h->payload_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + ipv6h->payload_len = len_new; + + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + + len = udph->len; + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + } else { + return NULL; + } + + udph->len = len_new; + + return udph; +} + +static __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; +} + +static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, + __u32 hdr_len) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + struct udphdr *udph = NULL; + __u32 buff_len; + + udph = update_pkt(ctx, 0 - offset, &udp_csum); + if (!udph) + return -1; + + buff_len = bpf_xdp_get_buff_len(ctx); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + /* Make sure we have enough data to avoid eating the header */ + if (buff_len - offset < hdr_len) + return -1; + + buff_pos = buff_len - offset; + if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + __u32 buff_len, hdr_len, key; + struct udphdr *udph; + __s32 *val; + __u8 tag; + + /* Proceed to update the packet headers before attempting to adjuste + * the tail. Once the tail is adjusted we lose access to the offset + * amount of data at the end of the packet which is crucial to update + * the checksum. + * Since any failure beyond this would abort the packet, we should + * not worry about passing a packet up the stack with wrong headers + */ + udph = update_pkt(ctx, offset, &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&tmp_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + buff_len = bpf_xdp_get_buff_len(ctx); + + if (bpf_xdp_adjust_tail(ctx, offset) < 0) { + bpf_printk("Failed to adjust tail\n"); + return -1; + } + + if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) +{ + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + __s32 *adjust_offset, *val; + __u32 key, hdr_len; + void *offset_ptr; + __u8 tag; + int ret; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + hdr_len = (void *)udph - data + sizeof(struct udphdr); + key = XDP_ADJST_OFFSET; + adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!adjust_offset) + return XDP_PASS; + + if (*adjust_offset < 0) + ret = xdp_adjst_tail_shrnk_data(ctx, + (__u16)(0 - *adjust_offset), + hdr_len); + else + ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset)); + if (ret) + goto abort_pkt; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort_pkt: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + struct udphdr *udph; + void *offset_ptr; + __u32 udp_csum = 0; + + /* Update the length information in the IP and UDP headers before + * adjusting the headroom. This simplifies accessing the relevant + * fields in the IP and UDP headers for fragmented packets. Any + * failure beyond this point will result in the packet being aborted, + * so we don't need to worry about incorrect length information for + * passed packets. + */ + udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum); + if (!udph) + return -1; + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) + return -1; + + if (bpf_xdp_adjust_head(ctx, offset) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char hdr_buff[MAX_HDR_LEN]; + char data_buff[MAX_ADJST_OFFSET]; + void *offset_ptr; + __s32 *val; + __u32 key; + __u8 tag; + __u32 udp_csum = 0; + struct udphdr *udph; + + udph = update_pkt(ctx, (__s16)(offset), &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&data_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph_ptr = NULL; + __u32 key, size, hdr_len; + __s32 *val; + int res; + + /* Filter packets based on UDP port */ + udph_ptr = filter_udphdr(ctx, port); + if (!udph_ptr) + return XDP_PASS; + + hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr); + + key = XDP_ADJST_OFFSET; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return XDP_PASS; + + switch (*val) { + case -16: + case 16: + size = 16; + break; + case -32: + case 32: + size = 32; + break; + case -64: + case 64: + size = 64; + break; + case -128: + case 128: + size = 128; + break; + case -256: + case 256: + size = 256; + break; + default: + bpf_printk("Invalid adjustment offset: %d\n", *val); + goto abort; + } + + if (*val < 0) + res = xdp_adjst_head_grow_data(ctx, hdr_len, size); + else + res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size); + + if (res) + goto abort; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_prog_common(struct xdp_md *ctx) +{ + __u32 key, *port; + __s32 *mode; + + key = XDP_MODE; + mode = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!mode) + return XDP_PASS; + + key = XDP_PORT; + port = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!port) + return XDP_PASS; + + switch (*mode) { + case XDP_MODE_PASS: + return xdp_mode_pass(ctx, (__u16)(*port)); + case XDP_MODE_DROP: + return xdp_mode_drop_handler(ctx, (__u16)(*port)); + case XDP_MODE_TX: + return xdp_mode_tx_handler(ctx, (__u16)(*port)); + case XDP_MODE_TAIL_ADJST: + return xdp_adjst_tail(ctx, (__u16)(*port)); + case XDP_MODE_HEAD_ADJST: + return xdp_head_adjst(ctx, (__u16)(*port)); + } + + /* Default action is to simple pass */ + return XDP_PASS; +} + +SEC("xdp") +int xdp_prog(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +SEC("xdp.frags") +int xdp_prog_frags(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 4f80014cae49..968d440c03fe 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -13,6 +13,7 @@ CONFIG_NETFILTER_NETLINK=m CONFIG_NF_TABLES=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_BPF=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_STATISTIC=m @@ -25,6 +26,7 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 7ea5fb28c93d..1d5d3c4e7e87 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -77,6 +77,7 @@ static int cfg_cork; static bool cfg_cork_mixed; static int cfg_cpu = -1; /* default: pin to last cpu */ +static int cfg_expect_zerocopy = -1; static int cfg_family = PF_UNSPEC; static int cfg_ifindex = 1; static int cfg_payload_len; @@ -92,9 +93,9 @@ static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; static struct sockaddr_storage cfg_src_addr; +static int exitcode; static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; -static int zerocopied = -1; static uint32_t next_completion; static uint32_t sends_since_notify; @@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain) next_completion = hi + 1; zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED); - if (zerocopied == -1) - zerocopied = zerocopy; - else if (zerocopied != zerocopy) { - fprintf(stderr, "serr: inconsistent\n"); - zerocopied = zerocopy; + if (cfg_expect_zerocopy != -1 && + cfg_expect_zerocopy != zerocopy) { + fprintf(stderr, "serr: ee_code: %u != expected %u\n", + zerocopy, cfg_expect_zerocopy); + exitcode = 1; + /* suppress repeated messages */ + cfg_expect_zerocopy = zerocopy; } if (cfg_verbose >= 2) @@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol) fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n", packets, bytes >> 20, completions, - zerocopied == 1 ? 'y' : 'n'); + cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n'); } static int do_setup_rx(int domain, int type, int protocol) @@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv) case 'z': cfg_zerocopy = true; break; + case 'Z': + cfg_expect_zerocopy = !!atoi(optarg); + break; } } @@ -817,5 +823,5 @@ int main(int argc, char **argv) else error(1, 0, "unknown cfg_test %s", cfg_test); - return 0; + return exitcode; } diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index 89c22f5320e0..28178a38a4e7 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -6,6 +6,7 @@ set -e readonly DEV="veth0" +readonly DUMMY_DEV="dummy0" readonly DEV_MTU=65535 readonly BIN="./msg_zerocopy" @@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}" readonly NS1="${NSPREFIX}1" readonly NS2="${NSPREFIX}2" -readonly SADDR4='192.168.1.1' -readonly DADDR4='192.168.1.2' -readonly SADDR6='fd::1' -readonly DADDR6='fd::2' +readonly LPREFIX4='192.168.1' +readonly RPREFIX4='192.168.2' +readonly LPREFIX6='fd' +readonly RPREFIX6='fc' + readonly path_sysctl_mem="net.core.optmem_max" # No arguments: automated test if [[ "$#" -eq "0" ]]; then - $0 4 tcp -t 1 - $0 6 tcp -t 1 - $0 4 udp -t 1 - $0 6 udp -t 1 - echo "OK. All tests passed" - exit 0 + ret=0 + + $0 4 tcp -t 1 || ret=1 + $0 6 tcp -t 1 || ret=1 + $0 4 udp -t 1 || ret=1 + $0 6 udp -t 1 || ret=1 + + [[ "$ret" == "0" ]] && echo "OK. All tests passed" + exit $ret fi # Argument parsing @@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@" # Argument parsing: configure addresses if [[ "${IP}" == "4" ]]; then - readonly SADDR="${SADDR4}" - readonly DADDR="${DADDR4}" + readonly SADDR="${LPREFIX4}.1" + readonly DADDR="${LPREFIX4}.2" + readonly DUMMY_ADDR="${RPREFIX4}.1" + readonly DADDR_TXONLY="${RPREFIX4}.2" + readonly MASK="24" elif [[ "${IP}" == "6" ]]; then - readonly SADDR="${SADDR6}" - readonly DADDR="${DADDR6}" + readonly SADDR="${LPREFIX6}::1" + readonly DADDR="${LPREFIX6}::2" + readonly DUMMY_ADDR="${RPREFIX6}::1" + readonly DADDR_TXONLY="${RPREFIX6}::2" + readonly MASK="64" + readonly NODAD="nodad" else echo "Invalid IP version ${IP}" exit 1 @@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" +ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy + # Bring the devices up ip -netns "${NS1}" link set "${DEV}" up ip -netns "${NS2}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DUMMY_DEV}" up # Set fixed MAC addresses on the devices ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 # Add fixed IP addresses to the devices -ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" -ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" -ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad -ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad +ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD} + +ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}" +ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}" + +ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1 +ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1 # Optionally disable sg or csum offload to test edge cases # ip netns exec "${NS1}" ethtool -K "${DEV}" sg off +ret=0 + do_test() { local readonly ARGS="$1" - echo "ipv${IP} ${TXMODE} ${ARGS}" - ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & + # tx-rx test + # packets queued to a local socket are copied, + # sender notification has SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n" + ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & sleep 0.2 - ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1 wait + + # next test is unconnected tx to dummy0, cannot exercise with tcp + [[ "${TXMODE}" == "tcp" ]] && return + + # tx-only test: send out dummy0 + # packets leaving the host are not copied, + # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1 } do_test "${EXTRA_ARGS}" do_test "-z ${EXTRA_ARGS}" -echo ok + +[[ "$ret" == "0" ]] && echo "OK" diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh new file mode 100755 index 000000000000..18509da293e5 --- /dev/null +++ b/tools/testing/selftests/net/netdev-l2addr.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +set -o pipefail + +NSIM_ADDR=2025 +TEST_ADDR="d0:be:d0:be:d0:00" + +RET_CODE=0 + +cleanup() { + cleanup_netdevsim "$NSIM_ADDR" + cleanup_ns "$NS" +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + RET_CODE=1 +} + +get_addr() +{ + local type="$1" + local dev="$2" + local ns="$3" + + ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type" +} + +setup_ns NS + +nsim=$(create_netdevsim $NSIM_ADDR "$NS") + +get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr" +get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it" + +ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR" +ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR" + +[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr" +[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr" + +if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then + fail "Created netdevsim with broadcast permaddr" +fi + +nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR") + +get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr" +[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr" + +cleanup_netdevsim "$NSIM_ADDR" "$NS" + +exit $RET_CODE diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 363646f4fefe..79d5b33966ba 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -1,6 +1,8 @@ CONFIG_AUDIT=y CONFIG_BPF_SYSCALL=y CONFIG_BRIDGE=m +CONFIG_NETFILTER_XTABLES_LEGACY=y +CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_EBT_BROUTE=m CONFIG_BRIDGE_EBT_IP=m CONFIG_BRIDGE_EBT_REDIRECT=m @@ -14,7 +16,10 @@ CONFIG_INET_ESP=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m +CONFIG_IP_NF_NAT=m CONFIG_IP_NF_FILTER=m CONFIG_IP6_NF_FILTER=m CONFIG_IP_NF_RAW=m @@ -92,4 +97,4 @@ CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m CONFIG_TUN=m CONFIG_INET_DIAG=m -CONFIG_SCTP_DIAG=m +CONFIG_INET_SCTP_DIAG=m diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh index 6af2ea3ad6b8..9c9d5b38ab71 100755 --- a/tools/testing/selftests/net/netfilter/ipvs.sh +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -151,7 +151,7 @@ test_nat() { test_tun() { ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 - ip netns exec "${ns1}" modprobe -q ipip + modprobe -q ipip ip netns exec "${ns1}" ip link set tunl0 up ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0 ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0 @@ -160,10 +160,10 @@ test_tun() { ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port} ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1 - ip netns exec "${ns2}" modprobe -q ipip ip netns exec "${ns2}" ip link set tunl0 up ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh index 5ff7be9daeee..c0fffaa6dbd9 100755 --- a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh +++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh @@ -10,6 +10,8 @@ source lib.sh checktool "nft --version" "run test without nft tool" checktool "iperf3 --version" "run test without iperf3 tool" +read kernel_tainted < /proc/sys/kernel/tainted + # how many seconds to torture the kernel? # default to 80% of max run time but don't exceed 48s TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10)) @@ -135,7 +137,8 @@ else wait fi -[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || { + +[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && { echo "FAIL: Kernel is tainted!" exit $ksft_fail } diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index cd8a58097448..1f5227f3d64d 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len) name[0] = '\0'; rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); if (rc < 0) - log_err_errno("setsockopt(SO_BINDTODEVICE)"); + log_err_errno("getsockopt(SO_BINDTODEVICE)"); return rc; } @@ -535,7 +535,7 @@ static int set_freebind(int sd, int version) break; case AF_INET6: if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) { - log_err_errno("setsockopt(IPV6_FREEBIND"); + log_err_errno("setsockopt(IPV6_FREEBIND)"); rc = -1; } break; @@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str, sep++; if (str_to_uint(sep, 1, pfx_len_max, &args->prefix_len) != 0) { - fprintf(stderr, "Invalid port\n"); + fprintf(stderr, "Invalid prefix length\n"); return 1; } } else { @@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen, } } - nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; + nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; while (1) { FD_ZERO(&rfds); FD_SET(sd, &rfds); @@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args) sd = socket(args->version, args->type, args->protocol); if (sd < 0) { log_err_errno("Error opening socket"); - return -1; + return -1; } if (set_reuseaddr(sd) != 0) @@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) * waiting to be told when to continue */ if (read(fd, &buf, sizeof(buf)) <= 0) { - log_err_errno("Failed to read IPC status from status"); + log_err_errno("Failed to read IPC status from pipe"); return 1; } if (!buf) { diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index beaee5e4e2aa..5c66421ab8aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -2,8 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 import time +from os import system from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_eq, ksft_ge, ksft_busy_wait +from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait from lib.py import NetdevFamily, NetdevSimDev, ip @@ -34,6 +35,128 @@ def napi_list_check(nf) -> None: ksft_eq(len(napis), 100, comment=f"queue count after reset queue {q} mode {i}") +def napi_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at napi and device level. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': "enabled"}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # check it is not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # verify if napi threaded is still set + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # check it is still not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + # unset napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': "disabled"}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + + # set threaded at device level + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "enabled") + ksft_ne(napi1.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + # set napi threaded for napi0 + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + +def dev_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at device level using sysfs. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set threaded + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "enabled") + ksft_ne(napi1.get('pid'), None) + + # unset threaded + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: """ @@ -122,7 +245,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, - nsim_rxq_reset_down], + dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index ef8b25a606d8..c5b01e1bd4c7 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then # xfail tests that are known flaky with dbg config, not fixable. # still run them for coverage (and expect 100% pass without dbg). declare -ar xfail_list=( + "tcp_blocking_blocking-connect.pkt" + "tcp_blocking_blocking-read.pkt" "tcp_eor_no-coalesce-retrans.pkt" "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_sack_sack-route-refresh-ip-tos.pkt" "tcp_slow_start_slow-start-after-win-update.pkt" "tcp_timestamping.*.pkt" "tcp_user_timeout_user-timeout-probe.pkt" + "tcp_zerocopy_cl.*.pkt" "tcp_zerocopy_epoll_.*.pkt" "tcp_tcp_info_tcp-info-.*-limited.pkt" ) diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt index 914eabab367a..657e42ca65b5 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test for blocking read. + --tolerance_usecs=10000 +--mss=1000 `./defaults.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt new file mode 100644 index 000000000000..c790d0af635e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test various DSACK (RFC 2883) behaviors. + +--mss=1000 + +`./defaults.sh` + + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + +// First SACK range. + +0 < P. 1001:2001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001> + +// Check SACK coalescing (contiguous sequence). + +0 < P. 2001:3001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001> + +// Check we have two SACK ranges for non contiguous sequences. + +0 < P. 4001:5001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001> + +// Three ranges. + +0 < P. 7001:8001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001> + +// DSACK (1001:3001) + SACK (6001:7001) + +0 < P. 1:6001(6000) ack 1 win 1024 + +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001> + +// DSACK (7001:8001) + +0 < P. 6001:8001(2000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001> + +// DSACK for an older segment. + +0 < P. 1:1001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001> diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt index df49c67645ac..e13f0eee9795 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test TCP_INQ and TCP_CM_INQ on the client side. + +--mss=1000 + `./defaults.sh ` diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt index 04a5e2590c62..14dd5f813d50 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test TCP_INQ and TCP_CM_INQ on the server side. + +--mss=1000 + `./defaults.sh ` diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt new file mode 100644 index 000000000000..7e6bc5fb0c8d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:11001(9000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001> + +// check that ooo packet properly updates tcpi_rcv_mss + +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }% + + +0 < . 11001:21001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001> + diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt new file mode 100644 index 000000000000..3848b419e68c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:4001(4000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +1 < P. 5001:55001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 70001:80001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + + +0 read(4, ..., 100000) = 4000 + +// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 54001 win 0 + +// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. ++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "` diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt new file mode 100644 index 000000000000..f575c0ff89da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:20001(20000) ack 1 win 257 + +.04 > . 1:1(0) ack 20001 win 18000 + + +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0 + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 20001 win 18000 + + +0 read(4, ..., 20000) = 20000 +// A too big packet is accepted if the receive queue is empty + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 80001 win 0 + diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 2e8243a65b50..d6c00efeb664 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -21,6 +21,7 @@ ALL_TESTS=" kci_test_vrf kci_test_encap kci_test_macsec + kci_test_macsec_vlan kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get @@ -30,6 +31,7 @@ ALL_TESTS=" kci_test_address_proto kci_test_enslave_bonding kci_test_mngtmpaddr + kci_test_operstate " devdummy="test-dummy0" @@ -291,6 +293,17 @@ kci_test_route_get() end_test "PASS: route get" } +check_addr_not_exist() +{ + dev=$1 + addr=$2 + if ip addr show dev $dev | grep -q $addr; then + return 1 + else + return 0 + fi +} + kci_test_addrlft() { for i in $(seq 10 100) ;do @@ -298,9 +311,8 @@ kci_test_addrlft() run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) done - sleep 5 - run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" - if [ $? -eq 0 ]; then + slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." + if [ $? -eq 1 ]; then check_err 1 end_test "FAIL: preferred_lft addresses remaining" return @@ -561,6 +573,41 @@ kci_test_macsec() end_test "PASS: macsec" } +# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock. +# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance +# lock. +# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/ +kci_test_macsec_vlan() +{ + msname="test_macsec1" + vlanname="test_vlan1" + local ret=0 + run_cmd_grep "^Usage: ip macsec" ip macsec help + if [ $? -ne 0 ]; then + end_test "SKIP: macsec: iproute2 too old" + return $ksft_skip + fi + run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on + if [ $ret -ne 0 ];then + end_test "FAIL: can't add macsec interface, skipping test" + return 1 + fi + + run_cmd ip link set dev "$msname" up + ip link add link "$msname" name "$vlanname" type vlan id 1 + ip link set dev "$vlanname" address 00:11:22:33:44:88 + ip link set dev "$vlanname" up + run_cmd ip link del dev "$vlanname" + run_cmd ip link del dev "$msname" + + if [ $ret -ne 0 ];then + end_test "FAIL: macsec_vlan" + return 1 + fi + + end_test "PASS: macsec_vlan" +} + #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ @@ -673,6 +720,11 @@ kci_test_ipsec_offload() sysfsf=$sysfsd/ipsec sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + esp4_offload_probed_default=false + + if lsmod | grep -q esp4_offload; then + esp4_offload_probed_default=true + fi if ! mount | grep -q debugfs; then mount -t debugfs none /sys/kernel/debug/ &> /dev/null @@ -766,6 +818,7 @@ EOF fi # clean up any leftovers + ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload echo 0 > /sys/bus/netdevsim/del_device $probed && rmmod netdevsim @@ -1334,6 +1387,39 @@ kci_test_mngtmpaddr() return $ret } +kci_test_operstate() +{ + local ret=0 + + # Check that it is possible to set operational state during device + # creation and that it is preserved when the administrative state of + # the device is toggled. + run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UP" ip link show dev vx0 + run_cmd ip link set dev vx0 down + run_cmd_grep "state DOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + # Check that it is possible to set the operational state of the device + # after creation. + run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UNKNOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 state up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + if [ "$ret" -ne 0 ]; then + end_test "FAIL: operstate" + return 1 + fi + + end_test "PASS: operstate" +} + kci_test_rtnl() { local current_test diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh new file mode 100755 index 000000000000..3f9780232bd6 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink_notification.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking rtnetlink notification callpaths, and get as much +# coverage as possible. +# +# set -e + +ALL_TESTS=" + kci_test_mcast_addr_notification + kci_test_anycast_addr_notification +" + +source lib.sh +test_dev="test-dummy1" + +kci_test_mcast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor maddr > $tmpfile & + monitor_pid=$! + defer kill_process "$monitor_pid" + + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "mcast addr notification: iproute2 too old" + return $RET + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 4 line matches as follows. + # 13: test-dummy1  inet6 mcast ff02::1 scope global + # 13: test-dummy1  inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1  inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1  inet6 mcast ff02::1 scope global + match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile") + if [ "$match_result" -ne 4 ]; then + RET=$ksft_fail + fi + log_test "mcast addr notification: Expected 4 matches, got $match_result" + return $RET +} + +kci_test_anycast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor acaddress > "$tmpfile" & + monitor_pid=$! + defer kill_process "$monitor_pid" + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "anycast addr notification: iproute2 too old" + return "$RET" + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + sysctl -qw net.ipv6.conf."$test_dev".forwarding=1 + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 2 line matches as follows. + # 9: dummy2 inet6 any fe80:: scope global + # Deleted 9: dummy2 inet6 any fe80:: scope global + match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile") + if [ "$match_result" -ne 2 ]; then + RET=$ksft_fail + fi + log_test "anycast addr notification: Expected 2 matches, got $match_result" + return "$RET" +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + RET=$ksft_skip + log_test "need root privileges" + exit $RET +fi + +require_command ip + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh index ba730655a7bf..4bc135e5c22c 100755 --- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh @@ -594,7 +594,7 @@ setup_rt_local_sids() dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index 4b86040c58c6..34b781a2ae74 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -72,6 +72,9 @@ # Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in # the selftest network. # +# In addition, every router interface connecting rt-x to rt-y is assigned an +# IPv6 link-local address fe80::x:y/64. +# # Local SID/C-SID table # ===================== # @@ -521,6 +524,9 @@ setup_rt_networking() ip -netns "${nsname}" addr \ add "${net_prefix}::${rt}/64" dev "${devname}" nodad + ip -netns "${nsname}" addr \ + add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad + ip -netns "${nsname}" link set "${devname}" up done @@ -609,6 +615,27 @@ set_end_x_nextcsid() nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" } +set_end_x_ll_nextcsid() +{ + local rt="$1" + local adj="$2" + + eval nsname=\${$(get_rtname "${rt}")} + lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" + nh6_ll_addr="fe80::${adj}:${rt}" + oifname="veth-rt-${rt}-${adj}" + + # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop + # address (note that "dev" is the dummy dum0 device chosen for the sake + # of simplicity). + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.X nh6 "${nh6_ll_addr}" \ + oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \ + nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" +} + set_underlay_sids_reachability() { local rt="$1" @@ -654,7 +681,7 @@ setup_rt_local_sids() set_underlay_sids_reachability "${rt}" "${rt_neighs}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -1016,6 +1043,27 @@ host_vpn_tests() check_and_log_hs_ipv4_connectivity 1 2 check_and_log_hs_ipv4_connectivity 2 1 + + # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local + # addresses. + # - rt-3 SRv6 End.X adjacency with rt-4 + # - rt-4 SRv6 End.X adjacency with rt-1 + set_end_x_ll_nextcsid 3 4 + set_end_x_ll_nextcsid 4 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local" + + check_and_log_hs_ipv6_connectivity 1 2 + check_and_log_hs_ipv6_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local" + + check_and_log_hs_ipv4_connectivity 1 2 + check_and_log_hs_ipv4_connectivity 2 1 + + # Restore the previous adjacencies. + set_end_x_nextcsid 3 4 + set_end_x_nextcsid 4 1 } __nextcsid_end_x_behavior_test() diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 3efce1718c5f..6a68c7eff1dc 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -395,7 +395,7 @@ setup_rt_local_sids() dev "${VRF_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh index cabc70538ffe..0979b5316fdf 100755 --- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -343,7 +343,7 @@ setup_rt_local_sids() encap seg6local action End dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behaviors instaces are grouped together in the 'localsid' + # Endpoint behaviors instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule add \ to "${VPN_LOCATOR_SERVICE}::/16" \ diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index f00245263b20..6478da6a71c3 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Check that after SEQ number wrap-around: * 1. SEQ-extension has upper bytes set - * 2. TCP conneciton is alive and no TCPAOBad segments + * 2. TCP connection is alive and no TCPAOBad segments * In order to test (2), the test doesn't just adjust seq number for a queue * on a connected socket, but migrates it to another sk+port number, so * that there won't be any delayed packets that will fail to verify diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh new file mode 100755 index 000000000000..388056472b5b --- /dev/null +++ b/tools/testing/selftests/net/test_neigh.sh @@ -0,0 +1,366 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + extern_valid_ipv4 + extern_valid_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Setup + +setup() +{ + set -e + + setup_ns ns1 ns2 + + ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2" + ip -n "$ns1" link set dev veth0 up + ip -n "$ns2" link set dev veth1 up + + ip -n "$ns1" address add 192.0.2.1/24 dev veth0 + ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad + ip -n "$ns2" address add 192.0.2.2/24 dev veth1 + ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad + + ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + + sleep 5 + + set +e +} + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +extern_valid_common() +{ + local af_str=$1; shift + local ip_addr=$1; shift + local tbl_name=$1; shift + local subnet=$1; shift + local mac + + mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]') + + RET=0 + + # Check that simple addition works. + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "No \"extern_valid\" flag after addition" + + log_test "$af_str \"extern_valid\" flag: Add entry" + + RET=0 + + # Check that an entry cannot be added with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Add with an invalid state" + + RET=0 + + # Check that entry cannot be added with both "extern_valid" flag and + # "use" / "managed" flag. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag" + + log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag" + + RET=0 + + # Check that "extern_valid" flag can be toggled using replace. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Did not manage to set \"extern_valid\" flag with replace" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_fail $? "Did not manage to clear \"extern_valid\" flag with replace" + + log_test "$af_str \"extern_valid\" flag: Replace entry" + + RET=0 + + # Check that an existing "extern_valid" entry can be marked as + # "managed". + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed" + check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry" + + log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag" + + RET=0 + + # Check that entry cannot be replaced with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Replace with an invalid state" + + RET=0 + + # Check that an "extern_valid" entry is flushed when the interface is + # put administratively down. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 link set dev veth0 down" + run_cmd "ip -n $ns1 link set dev veth0 up" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_fail $? "\"extern_valid\" entry not flushed upon interface down" + + log_test "$af_str \"extern_valid\" flag: Interface down" + + RET=0 + + # Check that an "extern_valid" entry is not flushed when the interface + # loses its carrier. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns2 link set dev veth1 down" + run_cmd "ip -n $ns2 link set dev veth1 up" + run_cmd "sleep 2" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_err $? "\"extern_valid\" entry flushed upon carrier down" + + log_test "$af_str \"extern_valid\" flag: Carrier down" + + RET=0 + + # Check that when entry transitions to "reachable" state it maintains + # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request / + # NS to be sent. + local delay_probe + + delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]') + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + run_cmd "sleep $((delay_probe / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\"" + check_err $? "Entry did not transition to \"reachable\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state" + + log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state" + + RET=0 + + # Drop all packets, trigger resolution and check that entry goes back + # to "stale" state instead of "failed". + local mcast_reprobes + local retrans_time + local ucast_probes + local app_probes + local probes + local delay + + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "tc -n $ns2 qdisc add dev veth1 clsact" + run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]') + ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]') + app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]') + mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]') + delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time)) + run_cmd "sleep $((delay / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\"" + check_err $? "Entry did not return to \"stale\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state" + probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]') + if [[ $probes -eq 0 ]]; then + check_err 1 "No probes were sent" + fi + + log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state" + + run_cmd "tc -n $ns2 qdisc del dev veth1 clsact" + + RET=0 + + # Forced garbage collection runs whenever the number of entries is + # larger than "thresh3" and deletes stale entries that have not been + # updated in the last 5 seconds. + # + # Check that an "extern_valid" entry survives a forced garbage + # collection. Add an entry, wait 5 seconds and add more entries than + # "thresh3" so that forced garbage collection will run. + # + # Note that the garbage collection thresholds are global resources and + # that changes in the initial namespace affect all the namespaces. + local forced_gc_runs_t0 + local forced_gc_runs_t1 + local orig_thresh1 + local orig_thresh2 + local orig_thresh3 + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]') + orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]') + run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + run_cmd "sleep 5" + forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then + check_err 1 "Forced garbage collection did not run" + fi + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection" + + log_test "$af_str \"extern_valid\" flag: Forced garbage collection" + + run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1" + + RET=0 + + # Periodic garbage collection runs every "base_reachable"/2 seconds and + # if the number of entries is larger than "thresh1", then it deletes + # stale entries that have not been used in the last "gc_stale" seconds. + # + # Check that an "extern_valid" entry survives a periodic garbage + # collection. Add an "extern_valid" entry, add more than "thresh1" + # regular entries, wait "base_reachable" (longer than "gc_stale") + # seconds and check that the "extern_valid" entry was not deleted. + # + # Note that the garbage collection thresholds and "base_reachable" are + # global resources and that changes in the initial namespace affect all + # the namespaces. + local periodic_gc_runs_t0 + local periodic_gc_runs_t1 + local orig_base_reachable + local orig_gc_stale + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]') + run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000" + orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]') + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 5000" + # Wait orig_base_reachable/2 for the new interval to take effect. + run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + run_cmd "sleep 10" + periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]] + check_err $? "Periodic garbage collection did not run" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection" + + log_test "$af_str \"extern_valid\" flag: Periodic garbage collection" + + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale" + run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable" +} + +extern_valid_ipv4() +{ + extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2. +} + +extern_valid_ipv6() +{ + extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1:: +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command jq + +if ! ip neigh help 2>&1 | grep -q "extern_valid"; then + echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup; $t; cleanup_all_ns; +done diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 6127a78ee988..8deacc565afa 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -146,18 +146,17 @@ run_cmd() } check_hv_connectivity() { - ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null - sleep 1 - ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null return $? } check_vm_connectivity() { - run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" + slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" - run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" + slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" } diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index e9c2f71da207..ce34cb2e6e0b 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -275,7 +275,7 @@ setup_sym() # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } setup_asym() @@ -370,7 +370,7 @@ setup_asym() ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } check_connectivity() diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index edc08a4433fd..ed1e2886ba3c 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -120,6 +120,7 @@ static void usage(char *progname) " -c query the ptp clock's capabilities\n" " -d name device to open\n" " -e val read 'val' external time stamp events\n" + " -E val enable rising (1), falling (2), or both (3) edges\n" " -f val adjust the ptp clock frequency by 'val' ppb\n" " -F chan Enable single channel mask and keep device open for debugfs verification.\n" " -g get the ptp clock time\n" @@ -178,6 +179,7 @@ int main(int argc, char *argv[]) int adjphase = 0; int capabilities = 0; int extts = 0; + int edge = 0; int flagtest = 0; int gettime = 0; int index = 0; @@ -202,7 +204,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:E:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) { switch (c) { case 'c': capabilities = 1; @@ -213,6 +215,11 @@ int main(int argc, char *argv[]) case 'e': extts = atoi(optarg); break; + case 'E': + edge = atoi(optarg); + edge = (edge & 1 ? PTP_RISING_EDGE : 0) | + (edge & 2 ? PTP_FALLING_EDGE : 0); + break; case 'f': adjfreq = atoi(optarg); break; @@ -444,7 +451,7 @@ int main(int argc, char *argv[]) if (!readonly) { memset(&extts_request, 0, sizeof(extts_request)); extts_request.index = index; - extts_request.flags = PTP_ENABLE_FEATURE; + extts_request.flags = PTP_ENABLE_FEATURE | edge; if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { perror("PTP_EXTTS_REQUEST"); extts = 0; diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 11f8d232b0ee..3edfd064ef81 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -44,7 +44,7 @@ fi ncpus="`getconf _NPROCESSORS_ONLN`" make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1 retval=$? -if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|Error|error:|warning:" || grep -E -q "Stop|Error|error:" < $resdir/Make.out +if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|ERROR|Error|error:|warning:" || grep -E -q "Stop|ERROR|Error|error:" < $resdir/Make.out then echo Kernel build error grep -E "Stop|Error|error:|warning:" < $resdir/Make.out diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 42e5e8597a1a..617cba339d28 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -199,7 +199,7 @@ do fi ;; --kconfig|--kconfigs) - checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$' + checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \+\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)* *$' '^error$' TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; @@ -442,18 +442,7 @@ echo $scriptname $args touch $resdir/$ds/log echo $scriptname $args >> $resdir/$ds/log echo ${TORTURE_SUITE} > $resdir/$ds/torture_suite -echo Build directory: `pwd` > $resdir/$ds/testid.txt -if test -d .git -then - echo Current commit: `git rev-parse HEAD` >> $resdir/$ds/testid.txt - echo >> $resdir/$ds/testid.txt - echo ' ---' Output of "'"git status"'": >> $resdir/$ds/testid.txt - git status >> $resdir/$ds/testid.txt - echo >> $resdir/$ds/testid.txt - echo >> $resdir/$ds/testid.txt - echo ' ---' Output of "'"git diff HEAD"'": >> $resdir/$ds/testid.txt - git diff HEAD >> $resdir/$ds/testid.txt -fi +mktestid.sh $resdir/$ds ___EOF___ kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk diff --git a/tools/testing/selftests/rcutorture/bin/mktestid.sh b/tools/testing/selftests/rcutorture/bin/mktestid.sh new file mode 100755 index 000000000000..16f9907a4dae --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/mktestid.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Create a testid.txt file in the specified directory. +# +# Usage: mktestid.sh dirpath +# +# Copyright (C) Meta Platforms, Inc. 2025 +# +# Author: Paul E. McKenney <paulmck@kernel.org> + +resdir="$1" +if test -z "${resdir}" || ! test -d "${resdir}" || ! test -w "${resdir}" +then + echo Path '"'${resdir}'"' not writeable directory, no ${resdir}/testid.txt. + exit 1 +fi +echo Build directory: `pwd` > ${resdir}/testid.txt +if test -d .git +then + echo Current commit: `git rev-parse HEAD` >> ${resdir}/testid.txt + echo >> ${resdir}/testid.txt + echo ' ---' Output of "'"git status"'": >> ${resdir}/testid.txt + git status >> ${resdir}/testid.txt + echo >> ${resdir}/testid.txt + echo >> ${resdir}/testid.txt + echo ' ---' Output of "'"git diff HEAD"'": >> ${resdir}/testid.txt + git diff HEAD >> ${resdir}/testid.txt +fi diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index e03fdaca89b3..611bc03a8dc7 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -30,6 +30,15 @@ then VERBOSE_BATCH_CPUS=0 fi +# Machine architecture? ("uname -p" is said to be less portable.)1 +thisarch="`uname -m`" +if test "${thisarch}" = aarch64 +then + ifnotaarch64=no +else + ifnotaarch64=yes +fi + # Configurations/scenarios. configs_rcutorture= configs_locktorture= @@ -55,9 +64,9 @@ do_normal=yes explicit_normal=no do_kasan=yes do_kcsan=no -do_clocksourcewd=yes +do_clocksourcewd="${ifnotaarch64}" do_rt=yes -do_rcutasksflavors=yes +do_rcutasksflavors="${ifnotaarch64}" # FIXME: Back to "yes" when SMP=n auto-avoided do_srcu_lockdep=yes do_rcu_rust=no @@ -124,7 +133,7 @@ do ;; --do-all|--doall) do_allmodconfig=yes - do_rcutasksflavor=yes + do_rcutasksflavors="${ifnotaarch64}" # FIXME: Back to "yes" when SMP=n auto-avoided do_rcutorture=yes do_locktorture=yes do_scftorture=yes @@ -136,7 +145,7 @@ do explicit_normal=no do_kasan=yes do_kcsan=yes - do_clocksourcewd=yes + do_clocksourcewd="${ifnotaarch64}" do_srcu_lockdep=yes ;; --do-allmodconfig|--do-no-allmodconfig|--no-allmodconfig) @@ -274,7 +283,7 @@ then configs_rcutorture=CFLIST fi duration_rcutorture=$((duration_base*duration_rcutorture_frac/10)) -if test "$duration_rcutorture" -eq 0 +if test "$duration_rcutorture" -eq 0 && test "$do_locktorture" = "yes" then echo " --- Zero time for rcutorture, disabling" | tee -a $T/log do_rcutorture=no @@ -286,7 +295,7 @@ then configs_locktorture=CFLIST fi duration_locktorture=$((duration_base*duration_locktorture_frac/10)) -if test "$duration_locktorture" -eq 0 +if test "$duration_locktorture" -eq 0 && test "$do_locktorture" = "yes" then echo " --- Zero time for locktorture, disabling" | tee -a $T/log do_locktorture=no @@ -298,12 +307,19 @@ then configs_scftorture=CFLIST fi duration_scftorture=$((duration_base*duration_scftorture_frac/10)) -if test "$duration_scftorture" -eq 0 +if test "$duration_scftorture" -eq 0 && test "$do_scftorture" = "yes" then echo " --- Zero time for scftorture, disabling" | tee -a $T/log do_scftorture=no fi +# CONFIG_EXPERT=y is currently required for arm64 KCSAN runs. +kcsan_expert= +if test "${thisarch}" = aarch64 +then + kcsan_expert="CONFIG_EXPERT=y" +fi + touch $T/failures touch $T/successes @@ -362,13 +378,19 @@ function torture_set { then curflavor=$flavor torture_one "$@" - mv $T/last-resdir $T/last-resdir-nodebug || : + if test -e $T/last-resdir + then + mv $T/last-resdir $T/last-resdir-nodebug || : + fi fi if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan torture_one "$@" --kasan - mv $T/last-resdir $T/last-resdir-kasan || : + if test -e $T/last-resdir + then + mv $T/last-resdir $T/last-resdir-kasan || : + fi fi if test "$do_kcsan" = "yes" then @@ -378,8 +400,16 @@ function torture_set { kcsan_kmake_tag="--kmake-args" cur_kcsan_kmake_args="$kcsan_kmake_args" fi - torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan - mv $T/last-resdir $T/last-resdir-kcsan || : + chk_rdr_state= + if test "${flavor}" = rcutorture + then + chk_rdr_state="CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y" + fi + torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y ${kcsan_expert} ${chk_rdr_state}" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan + if test -e $T/last-resdir + then + mv $T/last-resdir $T/last-resdir-kcsan || : + fi fi } @@ -389,6 +419,7 @@ then echo " --- allmodconfig:" Start `date` | tee -a $T/log amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig" mkdir -p "$amcdir" + mktestid.sh "$amcdir" echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 retcode=$? @@ -407,6 +438,10 @@ then make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 retcode="$?" echo $retcode > "$amcdir/Make.exitcode" + if grep -E -q "Stop|ERROR|Error|error:|warning:" < "$amcdir/Make.out" + then + retcode=99 + fi buildphase='"make"' fi if test "$retcode" -eq 0 @@ -495,6 +530,7 @@ then echo " --- do-rcu-rust:" Start `date` | tee -a $T/log rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust" mkdir -p "$rrdir" + mktestid.sh "$rrdir" echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out make LLVM=1 rustavailable > $T/rustavailable.out 2>&1 retcode=$? @@ -681,7 +717,14 @@ nfailures=0 echo FAILURES: | tee -a $T/log if test -s "$T/failures" then - awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum" + awk < "$T/failures" -v sq="'" ' + { + print "echo " sq $0 sq; + if ($2 != "") + print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; + else + print "echo " sq " " sq "Run failed to produce results directory."; + }' | sh | tee -a $T/log | tee "$T/failuresum" nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" grep "^ Summary: " "$T/failuresum" | grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan" @@ -691,15 +734,18 @@ then fi ret=2 fi -if test "$do_kcsan" = "yes" +if test "$do_kcsan" = "yes" && test -e tools/testing/selftests/rcutorture/res/$ds then TORTURE_KCONFIG_KCSAN_ARG=1 tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh tools/testing/selftests/rcutorture/res/$ds > tools/testing/selftests/rcutorture/res/$ds/kcsan.sum fi echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log -tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" -find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors -find "$tdir" -name 'Make.out.diags' -print > $T/builderrors +tdir="`cat $T/successes $T/failures | awk 'NF > 1 { print $NF }' | head -1 | sed -e 's,/[^/]\+/*$,,'`" +if test -n "$tdir" +then + find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors + find "$tdir" -name 'Make.out.diags' -print > $T/builderrors +fi if test -s "$T/configerrors" then echo " Scenarios with .config errors: `wc -l "$T/configerrors" | awk '{ print $1 }'`" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED index 48d8a245c7fa..7d75f4b94943 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED +++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED @@ -5,3 +5,6 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y +CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y +CONFIG_RCU_TORTURE_TEST_LOG_CPU=y +CONFIG_RCU_TORTURE_TEST_LOG_GP=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index 45f572570a8c..98b6175e5aa0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -5,7 +5,6 @@ TREE04 TREE05 TREE07 TREE09 -SRCU-L SRCU-N SRCU-P SRCU-T diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L deleted file mode 100644 index 3b4fa8dbef8a..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L +++ /dev/null @@ -1,10 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_SMP=y -CONFIG_NR_CPUS=6 -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_RCU_EXPERT=n -CONFIG_KPROBES=n -CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot deleted file mode 100644 index 0207b3138c5b..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot +++ /dev/null @@ -1,3 +0,0 @@ -rcutorture.torture_type=srcu -rcutorture.reader_flavor=0x4 -rcutorture.fwd_progress=3 diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index a10350c8a46e..b2d8bd9026a7 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 # Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org> diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index db176fe7d0c3..c20aa16b1d63 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -21,6 +21,7 @@ CONFIG_NF_NAT=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NET_SCHED=y +CONFIG_IP_SET=m # # Queueing/Scheduling @@ -30,6 +31,7 @@ CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_FQ_CODEL=m diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index c6db7fa94f55..23a61e5b99d0 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -504,7 +504,6 @@ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", - "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", @@ -517,8 +516,8 @@ { "kind": "hfsc", "handle": "1:", - "bytes": 392, - "packets": 4 + "bytes": 294, + "packets": 3 } ], "matchCount": "1", diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json new file mode 100644 index 000000000000..cd1f2ee8f354 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json @@ -0,0 +1,254 @@ +[ + { + "id": "a4c7", + "name": "Create DualPI2 with default setting", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* step_thresh 1ms min_qlen_step 0p coupling_factor 2 drop_on_overload drop_dequeue classic_protection 10% l4s_ect split_gso", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "1ea4", + "name": "Create DualPI2 with memlimit", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 memlimit 20000000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* memlimit 20000000B", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "2130", + "name": "Create DualPI2 with typical_rtt and max_rtt", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 typical_rtt 20ms max_rtt 200ms", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 20ms tupdate 20ms alpha 0.042969 beta 1.496094", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "90c1", + "name": "Create DualPI2 with max_rtt", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 max_rtt 300ms", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 50ms tupdate 50ms alpha 0.050781 beta 0.996094", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "7b3c", + "name": "Create DualPI2 with any_ect option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 any_ect", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* any_ect", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "49a3", + "name": "Create DualPI2 with overflow option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 overflow", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* overflow", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "d0a1", + "name": "Create DualPI2 with drop_enqueue option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 drop_enqueue", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* drop_enqueue", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "f051", + "name": "Create DualPI2 with no_split_gso option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 no_split_gso", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* no_split_gso", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "456b", + "name": "Create DualPI2 with packet step_thresh", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 step_thresh 3p", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* step_thresh 3p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "610c", + "name": "Create DualPI2 with packet min_qlen_step", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 min_qlen_step 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* min_qlen_step 1p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "b4fa", + "name": "Create DualPI2 with packet coupling_factor", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 coupling_factor 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* coupling_factor 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "37f1", + "name": "Create DualPI2 with packet classic_protection", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 classic_protection 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* classic_protection 0%", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json index 3c4444961488..718d2df2aafa 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json @@ -336,5 +336,86 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "d34d", + "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change root", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", + "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" + ], + "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: netem duplicate 50%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "2", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] + }, + { + "id": "b33f", + "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change non-root", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", + "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" + ], + "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 2: netem duplicate 50%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "2", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] + }, + { + "id": "cafe", + "name": "NETEM test qdisc duplication restriction in qdisc tree", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1 duplicate 100%" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1: handle 2: netem duplicate 100%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] + }, + { + "id": "1337", + "name": "NETEM test qdisc duplication restriction in qdisc tree across branches", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY parent root handle 1:0 hfsc", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc rt m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc rt m2 10Mbit" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 28c6ce6da7db..531a2f6e4900 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -264,5 +264,41 @@ "matchPattern": "sfq", "matchCount": "0", "teardown": [] + }, + { + "id": "cdc1", + "name": "Check that a negative perturb timer is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb -10", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] + }, + { + "id": "a9f0", + "name": "Check that a too big perturb timer is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb 1000000000", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] } ] diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index 589b18ed758a..dae19687912d 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -4,8 +4,7 @@ # If a module is required and was not compiled # the test that requires it will fail anyways try_modprobe() { - modprobe -q -R "$1" - if [ $? -ne 0 ]; then + if ! modprobe -q -R "$1"; then echo "Module $1 not found... skipping." else modprobe "$1" @@ -67,4 +66,5 @@ try_modprobe sch_hfsc try_modprobe sch_hhf try_modprobe sch_htb try_modprobe sch_teql -./tdc.py -J`nproc` +try_modprobe sch_dualpi2 +./tdc.py -J"$(nproc)" diff --git a/tools/testing/selftests/vsock/.gitignore b/tools/testing/selftests/vsock/.gitignore new file mode 100644 index 000000000000..9c5bf379480f --- /dev/null +++ b/tools/testing/selftests/vsock/.gitignore @@ -0,0 +1,2 @@ +vmtest.log +vsock_test diff --git a/tools/testing/selftests/vsock/Makefile b/tools/testing/selftests/vsock/Makefile new file mode 100644 index 000000000000..c407c0afd938 --- /dev/null +++ b/tools/testing/selftests/vsock/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +CURDIR := $(abspath .) +TOOLSDIR := $(abspath ../../..) +VSOCK_TEST_DIR := $(TOOLSDIR)/testing/vsock +VSOCK_TEST_SRCS := $(wildcard $(VSOCK_TEST_DIR)/*.c $(VSOCK_TEST_DIR)/*.h) + +$(OUTPUT)/vsock_test: $(VSOCK_TEST_DIR)/vsock_test + install -m 755 $< $@ + +$(VSOCK_TEST_DIR)/vsock_test: $(VSOCK_TEST_SRCS) + $(MAKE) -C $(VSOCK_TEST_DIR) vsock_test +TEST_PROGS += vmtest.sh +TEST_GEN_FILES := vsock_test + +include ../lib.mk + diff --git a/tools/testing/selftests/vsock/config b/tools/testing/selftests/vsock/config new file mode 100644 index 000000000000..5f0a4f17dfc9 --- /dev/null +++ b/tools/testing/selftests/vsock/config @@ -0,0 +1,111 @@ +CONFIG_BLK_DEV_INITRD=y +CONFIG_BPF=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_BPF_EVENTS=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_FUNCTION_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_DYNAMIC_FTRACE=y +CONFIG_HAVE_KPROBES=y +CONFIG_KPROBES=y +CONFIG_KPROBE_EVENTS=y +CONFIG_ARCH_SUPPORTS_UPROBES=y +CONFIG_UPROBES=y +CONFIG_UPROBE_EVENTS=y +CONFIG_DEBUG_FS=y +CONFIG_FW_CFG_SYSFS=y +CONFIG_FW_CFG_SYSFS_CMDLINE=y +CONFIG_DRM=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_DRM_VIRTIO_GPU_KMS=y +CONFIG_DRM_BOCHS=y +CONFIG_VIRTIO_IOMMU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_PCI=y +CONFIG_SND_INTEL8X0=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SECURITYFS=y +CONFIG_CGROUP_BPF=y +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_XZ=y +CONFIG_SQUASHFS_ZSTD=y +CONFIG_FUSE_FS=y +CONFIG_VIRTIO_FS=y +CONFIG_SERIO=y +CONFIG_PCI=y +CONFIG_INPUT=y +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_X86_VERBOSE_BOOTUP=y +CONFIG_VGA_CONSOLE=y +CONFIG_FB=y +CONFIG_FB_VESA=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_DRV_CMOS=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_KVM_GUEST=y +CONFIG_KVM=y +CONFIG_KVM_INTEL=y +CONFIG_KVM_AMD=y +CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_DIAG=y +CONFIG_VSOCKETS_LOOPBACK=y +CONFIG_VMWARE_VMCI_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y +CONFIG_HYPERV_VSOCKETS=y +CONFIG_VMWARE_VMCI=y +CONFIG_VHOST_VSOCK=y +CONFIG_HYPERV=y +CONFIG_UEVENT_HELPER=n +CONFIG_VIRTIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_NET=y +CONFIG_NET_CORE=y +CONFIG_NETDEVICES=y +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_INET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_9P_FS=y +CONFIG_VIRTIO_NET=y +CONFIG_CMDLINE_OVERRIDE=n +CONFIG_BINFMT_SCRIPT=y +CONFIG_SHMEM=y +CONFIG_TMPFS=y +CONFIG_UNIX=y +CONFIG_MODULE_SIG_FORCE=n +CONFIG_DEVTMPFS=y +CONFIG_TTY=y +CONFIG_VT=y +CONFIG_UNIX98_PTYS=y +CONFIG_EARLY_PRINTK=y +CONFIG_INOTIFY_USER=y +CONFIG_BLOCK=y +CONFIG_SCSI_LOWLEVEL=y +CONFIG_SCSI=y +CONFIG_SCSI_VIRTIO=y +CONFIG_BLK_DEV_SD=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_CORE=y +CONFIG_I6300ESB_WDT=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_OVERLAY_FS=y +CONFIG_DAX=y +CONFIG_DAX_DRIVER=y +CONFIG_FS_DAX=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_ZONE_DEVICE=y diff --git a/tools/testing/selftests/vsock/settings b/tools/testing/selftests/vsock/settings new file mode 100644 index 000000000000..694d70710ff0 --- /dev/null +++ b/tools/testing/selftests/vsock/settings @@ -0,0 +1 @@ +timeout=300 diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh new file mode 100755 index 000000000000..edacebfc1632 --- /dev/null +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -0,0 +1,487 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Meta Platforms, Inc. and affiliates +# +# Dependencies: +# * virtme-ng +# * busybox-static (used by virtme-ng) +# * qemu (used by virtme-ng) + +readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" +readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../) + +source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh + +readonly VSOCK_TEST="${SCRIPT_DIR}"/vsock_test +readonly TEST_GUEST_PORT=51000 +readonly TEST_HOST_PORT=50000 +readonly TEST_HOST_PORT_LISTENER=50001 +readonly SSH_GUEST_PORT=22 +readonly SSH_HOST_PORT=2222 +readonly VSOCK_CID=1234 +readonly WAIT_PERIOD=3 +readonly WAIT_PERIOD_MAX=60 +readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX )) +readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid) + +# virtme-ng offers a netdev for ssh when using "--ssh", but we also need a +# control port forwarded for vsock_test. Because virtme-ng doesn't support +# adding an additional port to forward to the device created from "--ssh" and +# virtme-init mistakenly sets identical IPs to the ssh device and additional +# devices, we instead opt out of using --ssh, add the device manually, and also +# add the kernel cmdline options that virtme-init uses to setup the interface. +readonly QEMU_TEST_PORT_FWD="hostfwd=tcp::${TEST_HOST_PORT}-:${TEST_GUEST_PORT}" +readonly QEMU_SSH_PORT_FWD="hostfwd=tcp::${SSH_HOST_PORT}-:${SSH_GUEST_PORT}" +readonly QEMU_OPTS="\ + -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \ + -device virtio-net-pci,netdev=n0 \ + -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \ + --pidfile ${QEMU_PIDFILE} \ +" +readonly KERNEL_CMDLINE="\ + virtme.dhcp net.ifnames=0 biosdevname=0 \ + virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \ +" +readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log) +readonly TEST_NAMES=(vm_server_host_client vm_client_host_server vm_loopback) +readonly TEST_DESCS=( + "Run vsock_test in server mode on the VM and in client mode on the host." + "Run vsock_test in client mode on the VM and in server mode on the host." + "Run vsock_test using the loopback transport in the VM." +) + +VERBOSE=0 + +usage() { + local name + local desc + local i + + echo + echo "$0 [OPTIONS] [TEST]..." + echo "If no TEST argument is given, all tests will be run." + echo + echo "Options" + echo " -b: build the kernel from the current source tree and use it for guest VMs" + echo " -q: set the path to or name of qemu binary" + echo " -v: verbose output" + echo + echo "Available tests" + + for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do + name=${TEST_NAMES[${i}]} + desc=${TEST_DESCS[${i}]} + printf "\t%-35s%-35s\n" "${name}" "${desc}" + done + echo + + exit 1 +} + +die() { + echo "$*" >&2 + exit "${KSFT_FAIL}" +} + +vm_ssh() { + ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@" + return $? +} + +cleanup() { + if [[ -s "${QEMU_PIDFILE}" ]]; then + pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1 + fi + + # If failure occurred during or before qemu start up, then we need + # to clean this up ourselves. + if [[ -e "${QEMU_PIDFILE}" ]]; then + rm "${QEMU_PIDFILE}" + fi +} + +check_args() { + local found + + for arg in "$@"; do + found=0 + for name in "${TEST_NAMES[@]}"; do + if [[ "${name}" = "${arg}" ]]; then + found=1 + break + fi + done + + if [[ "${found}" -eq 0 ]]; then + echo "${arg} is not an available test" >&2 + usage + fi + done + + for arg in "$@"; do + if ! command -v > /dev/null "test_${arg}"; then + echo "Test ${arg} not found" >&2 + usage + fi + done +} + +check_deps() { + for dep in vng ${QEMU} busybox pkill ssh; do + if [[ ! -x $(command -v "${dep}") ]]; then + echo -e "skip: dependency ${dep} not found!\n" + exit "${KSFT_SKIP}" + fi + done + + if [[ ! -x $(command -v "${VSOCK_TEST}") ]]; then + printf "skip: %s not found!" "${VSOCK_TEST}" + printf " Please build the kselftest vsock target.\n" + exit "${KSFT_SKIP}" + fi +} + +check_vng() { + local tested_versions + local version + local ok + + tested_versions=("1.33" "1.36") + version="$(vng --version)" + + ok=0 + for tv in "${tested_versions[@]}"; do + if [[ "${version}" == *"${tv}"* ]]; then + ok=1 + break + fi + done + + if [[ ! "${ok}" -eq 1 ]]; then + printf "warning: vng version '%s' has not been tested and may " "${version}" >&2 + printf "not function properly.\n\tThe following versions have been tested: " >&2 + echo "${tested_versions[@]}" >&2 + fi +} + +handle_build() { + if [[ ! "${BUILD}" -eq 1 ]]; then + return + fi + + if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then + echo "-b requires vmtest.sh called from the kernel source tree" >&2 + exit 1 + fi + + pushd "${KERNEL_CHECKOUT}" &>/dev/null + + if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then + die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})" + fi + + if ! make -j$(nproc); then + die "failed to build kernel from source tree (${KERNEL_CHECKOUT})" + fi + + popd &>/dev/null +} + +vm_start() { + local logfile=/dev/null + local verbose_opt="" + local kernel_opt="" + local qemu + + qemu=$(command -v "${QEMU}") + + if [[ "${VERBOSE}" -eq 1 ]]; then + verbose_opt="--verbose" + logfile=/dev/stdout + fi + + if [[ "${BUILD}" -eq 1 ]]; then + kernel_opt="${KERNEL_CHECKOUT}" + fi + + vng \ + --run \ + ${kernel_opt} \ + ${verbose_opt} \ + --qemu-opts="${QEMU_OPTS}" \ + --qemu="${qemu}" \ + --user root \ + --append "${KERNEL_CMDLINE}" \ + --rw &> ${logfile} & + + if ! timeout ${WAIT_TOTAL} \ + bash -c 'while [[ ! -s '"${QEMU_PIDFILE}"' ]]; do sleep 1; done; exit 0'; then + die "failed to boot VM" + fi +} + +vm_wait_for_ssh() { + local i + + i=0 + while true; do + if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then + die "Timed out waiting for guest ssh" + fi + if vm_ssh -- true; then + break + fi + i=$(( i + 1 )) + sleep ${WAIT_PERIOD} + done +} + +# derived from selftests/net/net_helper.sh +wait_for_listener() +{ + local port=$1 + local interval=$2 + local max_intervals=$3 + local protocol=tcp + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ "${protocol}" = "tcp" ] && pattern="${pattern}0A" + for i in $(seq "${max_intervals}"); do + if awk '{print $2" "$4}' /proc/net/"${protocol}"* | \ + grep -q "${pattern}"; then + break + fi + sleep "${interval}" + done +} + +vm_wait_for_listener() { + local port=$1 + + vm_ssh <<EOF +$(declare -f wait_for_listener) +wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX} +EOF +} + +host_wait_for_listener() { + wait_for_listener "${TEST_HOST_PORT_LISTENER}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}" +} + +__log_stdin() { + cat | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }' +} + +__log_args() { + echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }' +} + +log() { + local prefix="$1" + + shift + local redirect= + if [[ ${VERBOSE} -eq 0 ]]; then + redirect=/dev/null + else + redirect=/dev/stdout + fi + + if [[ "$#" -eq 0 ]]; then + __log_stdin | tee -a "${LOG}" > ${redirect} + else + __log_args "$@" | tee -a "${LOG}" > ${redirect} + fi +} + +log_setup() { + log "setup" "$@" +} + +log_host() { + local testname=$1 + + shift + log "test:${testname}:host" "$@" +} + +log_guest() { + local testname=$1 + + shift + log "test:${testname}:guest" "$@" +} + +test_vm_server_host_client() { + local testname="${FUNCNAME[0]#test_}" + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=server \ + --control-port="${TEST_GUEST_PORT}" \ + --peer-cid=2 \ + 2>&1 | log_guest "${testname}" & + + vm_wait_for_listener "${TEST_GUEST_PORT}" + + ${VSOCK_TEST} \ + --mode=client \ + --control-host=127.0.0.1 \ + --peer-cid="${VSOCK_CID}" \ + --control-port="${TEST_HOST_PORT}" 2>&1 | log_host "${testname}" + + return $? +} + +test_vm_client_host_server() { + local testname="${FUNCNAME[0]#test_}" + + ${VSOCK_TEST} \ + --mode "server" \ + --control-port "${TEST_HOST_PORT_LISTENER}" \ + --peer-cid "${VSOCK_CID}" 2>&1 | log_host "${testname}" & + + host_wait_for_listener + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=client \ + --control-host=10.0.2.2 \ + --peer-cid=2 \ + --control-port="${TEST_HOST_PORT_LISTENER}" 2>&1 | log_guest "${testname}" + + return $? +} + +test_vm_loopback() { + local testname="${FUNCNAME[0]#test_}" + local port=60000 # non-forwarded local port + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=server \ + --control-port="${port}" \ + --peer-cid=1 2>&1 | log_guest "${testname}" & + + vm_wait_for_listener "${port}" + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=client \ + --control-host="127.0.0.1" \ + --control-port="${port}" \ + --peer-cid=1 2>&1 | log_guest "${testname}" + + return $? +} + +run_test() { + local host_oops_cnt_before + local host_warn_cnt_before + local vm_oops_cnt_before + local vm_warn_cnt_before + local host_oops_cnt_after + local host_warn_cnt_after + local vm_oops_cnt_after + local vm_warn_cnt_after + local name + local rc + + host_oops_cnt_before=$(dmesg | grep -c -i 'Oops') + host_warn_cnt_before=$(dmesg --level=warn | wc -l) + vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') + vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l) + + name=$(echo "${1}" | awk '{ print $1 }') + eval test_"${name}" + rc=$? + + host_oops_cnt_after=$(dmesg | grep -i 'Oops' | wc -l) + if [[ ${host_oops_cnt_after} -gt ${host_oops_cnt_before} ]]; then + echo "FAIL: kernel oops detected on host" | log_host "${name}" + rc=$KSFT_FAIL + fi + + host_warn_cnt_after=$(dmesg --level=warn | wc -l) + if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then + echo "FAIL: kernel warning detected on host" | log_host "${name}" + rc=$KSFT_FAIL + fi + + vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l) + if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then + echo "FAIL: kernel oops detected on vm" | log_host "${name}" + rc=$KSFT_FAIL + fi + + vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l) + if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then + echo "FAIL: kernel warning detected on vm" | log_host "${name}" + rc=$KSFT_FAIL + fi + + return "${rc}" +} + +QEMU="qemu-system-$(uname -m)" + +while getopts :hvsq:b o +do + case $o in + v) VERBOSE=1;; + b) BUILD=1;; + q) QEMU=$OPTARG;; + h|*) usage;; + esac +done +shift $((OPTIND-1)) + +trap cleanup EXIT + +if [[ ${#} -eq 0 ]]; then + ARGS=("${TEST_NAMES[@]}") +else + ARGS=("$@") +fi + +check_args "${ARGS[@]}" +check_deps +check_vng +handle_build + +echo "1..${#ARGS[@]}" + +log_setup "Booting up VM" +vm_start +vm_wait_for_ssh +log_setup "VM booted up" + +cnt_pass=0 +cnt_fail=0 +cnt_skip=0 +cnt_total=0 +for arg in "${ARGS[@]}"; do + run_test "${arg}" + rc=$? + if [[ ${rc} -eq $KSFT_PASS ]]; then + cnt_pass=$(( cnt_pass + 1 )) + echo "ok ${cnt_total} ${arg}" + elif [[ ${rc} -eq $KSFT_SKIP ]]; then + cnt_skip=$(( cnt_skip + 1 )) + echo "ok ${cnt_total} ${arg} # SKIP" + elif [[ ${rc} -eq $KSFT_FAIL ]]; then + cnt_fail=$(( cnt_fail + 1 )) + echo "not ok ${cnt_total} ${arg} # exit=$rc" + fi + cnt_total=$(( cnt_total + 1 )) +done + +echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}" +echo "Log: ${LOG}" + +if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then + exit "$KSFT_PASS" +else + exit "$KSFT_FAIL" +fi diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index f314d3789f17..0a5381717e9f 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -16,9 +16,13 @@ CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=y CONFIG_NF_NAT=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_MANGLE=y diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index 6e0b4e95e230..88211fd132d2 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -5,6 +5,7 @@ vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_ze vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o vsock_perf: vsock_perf.o msg_zerocopy_common.o +vsock_test: LDLIBS = -lpthread vsock_uring_test: LDLIBS = -luring vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 0c7e9cbcbc85..7b861a8e997a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -7,6 +7,7 @@ * Author: Stefan Hajnoczi <stefanha@redhat.com> */ +#include <ctype.h> #include <errno.h> #include <stdio.h> #include <stdint.h> @@ -16,6 +17,7 @@ #include <unistd.h> #include <assert.h> #include <sys/epoll.h> +#include <sys/ioctl.h> #include <sys/mman.h> #include <linux/sockios.h> @@ -23,6 +25,9 @@ #include "control.h" #include "util.h" +#define KALLSYMS_PATH "/proc/kallsyms" +#define KALLSYMS_LINE_LEN 512 + /* Install signal handlers */ void init_signals(void) { @@ -97,39 +102,52 @@ void vsock_wait_remote_close(int fd) close(epollfd); } -/* Wait until transport reports no data left to be sent. - * Return false if transport does not implement the unsent_bytes() callback. +/* Wait until ioctl gives an expected int value. + * Return false if the op is not supported. */ -bool vsock_wait_sent(int fd) +bool vsock_ioctl_int(int fd, unsigned long op, int expected) { - int ret, sock_bytes_unsent; + int actual, ret; + char name[32]; + + snprintf(name, sizeof(name), "ioctl(%lu)", op); timeout_begin(TIMEOUT); do { - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + ret = ioctl(fd, op, &actual); if (ret < 0) { - if (errno == EOPNOTSUPP) + if (errno == EOPNOTSUPP || errno == ENOTTY) break; - perror("ioctl(SIOCOUTQ)"); + perror(name); exit(EXIT_FAILURE); } - timeout_check("SIOCOUTQ"); - } while (sock_bytes_unsent != 0); + timeout_check(name); + } while (actual != expected); timeout_end(); - return !ret; + return ret >= 0; } -/* Create socket <type>, bind to <cid, port> and return the file descriptor. */ -int vsock_bind(unsigned int cid, unsigned int port, int type) +/* Wait until transport reports no data left to be sent. + * Return false if transport does not implement the unsent_bytes() callback. + */ +bool vsock_wait_sent(int fd) +{ + return vsock_ioctl_int(fd, SIOCOUTQ, 0); +} + +/* Create socket <type>, bind to <cid, port>. + * Return the file descriptor, or -1 on error. + */ +int vsock_bind_try(unsigned int cid, unsigned int port, int type) { struct sockaddr_vm sa = { .svm_family = AF_VSOCK, .svm_cid = cid, .svm_port = port, }; - int fd; + int fd, saved_errno; fd = socket(AF_VSOCK, type, 0); if (fd < 0) { @@ -138,6 +156,22 @@ int vsock_bind(unsigned int cid, unsigned int port, int type) } if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) { + saved_errno = errno; + close(fd); + errno = saved_errno; + fd = -1; + } + + return fd; +} + +/* Create socket <type>, bind to <cid, port> and return the file descriptor. */ +int vsock_bind(unsigned int cid, unsigned int port, int type) +{ + int fd; + + fd = vsock_bind_try(cid, port, type); + if (fd < 0) { perror("bind"); exit(EXIT_FAILURE); } @@ -836,3 +870,55 @@ void enable_so_linger(int fd, int timeout) exit(EXIT_FAILURE); } } + +static int __get_transports(void) +{ + char buf[KALLSYMS_LINE_LEN]; + const char *ksym; + int ret = 0; + FILE *f; + + f = fopen(KALLSYMS_PATH, "r"); + if (!f) { + perror("Can't open " KALLSYMS_PATH); + exit(EXIT_FAILURE); + } + + while (fgets(buf, sizeof(buf), f)) { + char *match; + int i; + + assert(buf[strlen(buf) - 1] == '\n'); + + for (i = 0; i < TRANSPORT_NUM; ++i) { + if (ret & BIT(i)) + continue; + + /* Match should be followed by '\t' or '\n'. + * See kallsyms.c:s_show(). + */ + ksym = transport_ksyms[i]; + match = strstr(buf, ksym); + if (match && isspace(match[strlen(ksym)])) { + ret |= BIT(i); + break; + } + } + } + + fclose(f); + return ret; +} + +/* Return integer with TRANSPORT_* bit set for every (known) registered vsock + * transport. + */ +int get_transports(void) +{ + static int tr = -1; + + if (tr == -1) + tr = __get_transports(); + + return tr; +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index 5e2db67072d5..142c02a6834a 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -3,8 +3,40 @@ #define UTIL_H #include <sys/socket.h> +#include <linux/bitops.h> +#include <linux/kernel.h> #include <linux/vm_sockets.h> +/* All known vsock transports, see callers of vsock_core_register() */ +#define KNOWN_TRANSPORTS(x) \ + x(LOOPBACK, "loopback") \ + x(VIRTIO, "virtio") \ + x(VHOST, "vhost") \ + x(VMCI, "vmci") \ + x(HYPERV, "hvs") + +enum transport { + TRANSPORT_COUNTER_BASE = __COUNTER__ + 1, + #define x(name, symbol) \ + TRANSPORT_##name = BIT(__COUNTER__ - TRANSPORT_COUNTER_BASE), + KNOWN_TRANSPORTS(x) + TRANSPORT_NUM = __COUNTER__ - TRANSPORT_COUNTER_BASE, + #undef x +}; + +static const char * const transport_ksyms[] = { + #define x(name, symbol) "d " symbol "_transport", + KNOWN_TRANSPORTS(x) + #undef x +}; + +static_assert(ARRAY_SIZE(transport_ksyms) == TRANSPORT_NUM); +static_assert(BITS_PER_TYPE(int) >= TRANSPORT_NUM); + +#define TRANSPORTS_G2H (TRANSPORT_VIRTIO | TRANSPORT_VMCI | TRANSPORT_HYPERV) +#define TRANSPORTS_H2G (TRANSPORT_VHOST | TRANSPORT_VMCI) +#define TRANSPORTS_LOCAL (TRANSPORT_LOOPBACK) + /* Tests can either run as the client or the server */ enum test_mode { TEST_MODE_UNSET, @@ -44,6 +76,7 @@ int vsock_connect(unsigned int cid, unsigned int port, int type); int vsock_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp, int type); int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_bind_try(unsigned int cid, unsigned int port, int type); int vsock_bind(unsigned int cid, unsigned int port, int type); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); @@ -54,6 +87,7 @@ int vsock_stream_listen(unsigned int cid, unsigned int port); int vsock_seqpacket_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); void vsock_wait_remote_close(int fd); +bool vsock_ioctl_int(int fd, unsigned long op, int expected); bool vsock_wait_sent(int fd); void send_buf(int fd, const void *buf, size_t len, int flags, ssize_t expected_ret); @@ -81,4 +115,5 @@ void setsockopt_timeval_check(int fd, int level, int optname, struct timeval val, char const *errmsg); void enable_so_zerocopy_check(int fd); void enable_so_linger(int fd, int timeout); +int get_transports(void); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index f669baaa0dca..d4517386e551 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -22,6 +22,9 @@ #include <signal.h> #include <sys/ioctl.h> #include <linux/time64.h> +#include <pthread.h> +#include <fcntl.h> +#include <linux/sockios.h> #include "vsock_test_zerocopy.h" #include "timeout.h" @@ -1305,6 +1308,54 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) close(fd); } +static void test_unread_bytes_server(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int client_fd; + + client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type); + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < sizeof(buf); i++) + buf[i] = rand() & 0xFF; + + send_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf)); + control_writeln("SENT"); + + close(client_fd); +} + +static void test_unread_bytes_client(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int fd; + + fd = vsock_connect(opts->peer_cid, opts->peer_port, type); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + control_expectln("SENT"); + /* The data has arrived but has not been read. The expected is + * MSG_BUF_IOCTL_LEN. + */ + if (!vsock_ioctl_int(fd, SIOCINQ, MSG_BUF_IOCTL_LEN)) { + fprintf(stderr, "Test skipped, SIOCINQ not supported.\n"); + goto out; + } + + recv_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); + /* All data has been consumed, so the expected is 0. */ + vsock_ioctl_int(fd, SIOCINQ, 0); + +out: + close(fd); +} + static void test_stream_unsent_bytes_client(const struct test_opts *opts) { test_unsent_bytes_client(opts, SOCK_STREAM); @@ -1325,6 +1376,26 @@ static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts) test_unsent_bytes_server(opts, SOCK_SEQPACKET); } +static void test_stream_unread_bytes_client(const struct test_opts *opts) +{ + test_unread_bytes_client(opts, SOCK_STREAM); +} + +static void test_stream_unread_bytes_server(const struct test_opts *opts) +{ + test_unread_bytes_server(opts, SOCK_STREAM); +} + +static void test_seqpacket_unread_bytes_client(const struct test_opts *opts) +{ + test_unread_bytes_client(opts, SOCK_SEQPACKET); +} + +static void test_seqpacket_unread_bytes_server(const struct test_opts *opts) +{ + test_unread_bytes_server(opts, SOCK_SEQPACKET); +} + #define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) /* This define is the same as in 'include/linux/virtio_vsock.h': * it is used to decide when to send credit update message during @@ -1718,16 +1789,27 @@ static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) #define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */ -/* Test attempts to trigger a transport release for an unbound socket. This can - * lead to a reference count mishandling. - */ -static void test_stream_transport_uaf_client(const struct test_opts *opts) +static bool test_stream_transport_uaf(int cid) { int sockets[MAX_PORT_RETRIES]; struct sockaddr_vm addr; - int fd, i, alen; + socklen_t alen; + int fd, i, c; + bool ret; - fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM); + /* Probe for a transport by attempting a local CID bind. Unavailable + * transport (or more specifically: an unsupported transport/CID + * combination) results in EADDRNOTAVAIL, other errnos are fatal. + */ + fd = vsock_bind_try(cid, VMADDR_PORT_ANY, SOCK_STREAM); + if (fd < 0) { + if (errno != EADDRNOTAVAIL) { + perror("Unexpected bind() errno"); + exit(EXIT_FAILURE); + } + + return false; + } alen = sizeof(addr); if (getsockname(fd, (struct sockaddr *)&addr, &alen)) { @@ -1735,38 +1817,83 @@ static void test_stream_transport_uaf_client(const struct test_opts *opts) exit(EXIT_FAILURE); } + /* Drain the autobind pool; see __vsock_bind_connectible(). */ for (i = 0; i < MAX_PORT_RETRIES; ++i) - sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port, - SOCK_STREAM); + sockets[i] = vsock_bind(cid, ++addr.svm_port, SOCK_STREAM); close(fd); - fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + /* Setting SOCK_NONBLOCK makes connect() return soon after + * (re-)assigning the transport. We are not connecting to anything + * anyway, so there is no point entering the main loop in + * vsock_connect(); waiting for timeout, checking for signals, etc. + */ + fd = socket(AF_VSOCK, SOCK_STREAM | SOCK_NONBLOCK, 0); if (fd < 0) { perror("socket"); exit(EXIT_FAILURE); } - if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) { - perror("Unexpected connect() #1 success"); + /* Assign transport, while failing to autobind. Autobind pool was + * drained, so EADDRNOTAVAIL coming from __vsock_bind_connectible() is + * expected. + * + * One exception is ENODEV which is thrown by vsock_assign_transport(), + * i.e. before vsock_auto_bind(), when the only transport loaded is + * vhost. + */ + if (!connect(fd, (struct sockaddr *)&addr, alen)) { + fprintf(stderr, "Unexpected connect() success\n"); exit(EXIT_FAILURE); } - - /* Vulnerable system may crash now. */ - if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) { - perror("Unexpected connect() #2 success"); + if (errno == ENODEV && cid == VMADDR_CID_HOST) { + ret = false; + goto cleanup; + } + if (errno != EADDRNOTAVAIL) { + perror("Unexpected connect() errno"); exit(EXIT_FAILURE); } + /* Reassign transport, triggering old transport release and + * (potentially) unbinding of an unbound socket. + * + * Vulnerable system may crash now. + */ + for (c = VMADDR_CID_HYPERVISOR; c <= VMADDR_CID_HOST + 1; ++c) { + if (c != cid) { + addr.svm_cid = c; + (void)connect(fd, (struct sockaddr *)&addr, alen); + } + } + + ret = true; +cleanup: close(fd); while (i--) close(sockets[i]); - control_writeln("DONE"); + return ret; } -static void test_stream_transport_uaf_server(const struct test_opts *opts) +/* Test attempts to trigger a transport release for an unbound socket. This can + * lead to a reference count mishandling. + */ +static void test_stream_transport_uaf_client(const struct test_opts *opts) { - control_expectln("DONE"); + bool tested = false; + int cid, tr; + + for (cid = VMADDR_CID_HYPERVISOR; cid <= VMADDR_CID_HOST + 1; ++cid) + tested |= test_stream_transport_uaf(cid); + + tr = get_transports(); + if (!tr) + fprintf(stderr, "No transports detected\n"); + else if (tr == TRANSPORT_VIRTIO) + fprintf(stderr, "Setup unsupported: sole virtio transport\n"); + else if (!tested) + fprintf(stderr, "No transports tested\n"); } static void test_stream_connect_retry_client(const struct test_opts *opts) @@ -1811,6 +1938,180 @@ static void test_stream_connect_retry_server(const struct test_opts *opts) close(fd); } +#define TRANSPORT_CHANGE_TIMEOUT 2 /* seconds */ + +static void *test_stream_transport_change_thread(void *vargp) +{ + pid_t *pid = (pid_t *)vargp; + int ret; + + ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); + if (ret) { + fprintf(stderr, "pthread_setcanceltype: %d\n", ret); + exit(EXIT_FAILURE); + } + + while (true) { + if (kill(*pid, SIGUSR1) < 0) { + perror("kill"); + exit(EXIT_FAILURE); + } + } + return NULL; +} + +static void test_transport_change_signal_handler(int signal) +{ + /* We need a custom handler for SIGUSR1 as the default one terminates the process. */ +} + +static void test_stream_transport_change_client(const struct test_opts *opts) +{ + __sighandler_t old_handler; + pid_t pid = getpid(); + pthread_t thread_id; + time_t tout; + int ret, tr; + + tr = get_transports(); + + /* Print a warning if there is a G2H transport loaded. + * This is on a best effort basis because VMCI can be either G2H and H2G, and there is + * no easy way to understand it. + * The bug we are testing only appears when G2H transports are not loaded. + * This is because `vsock_assign_transport`, when using CID 0, assigns a G2H transport + * to vsk->transport. If none is available it is set to NULL, causing the null-ptr-deref. + */ + if (tr & TRANSPORTS_G2H) + fprintf(stderr, "G2H Transport detected. This test will not fail.\n"); + + old_handler = signal(SIGUSR1, test_transport_change_signal_handler); + if (old_handler == SIG_ERR) { + perror("signal"); + exit(EXIT_FAILURE); + } + + ret = pthread_create(&thread_id, NULL, test_stream_transport_change_thread, &pid); + if (ret) { + fprintf(stderr, "pthread_create: %d\n", ret); + exit(EXIT_FAILURE); + } + + control_expectln("LISTENING"); + + tout = current_nsec() + TRANSPORT_CHANGE_TIMEOUT * NSEC_PER_SEC; + do { + struct sockaddr_vm sa = { + .svm_family = AF_VSOCK, + .svm_cid = opts->peer_cid, + .svm_port = opts->peer_port, + }; + bool send_control = false; + int s; + + s = socket(AF_VSOCK, SOCK_STREAM, 0); + if (s < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); + /* The connect can fail due to signals coming from the thread, + * or because the receiver connection queue is full. + * Ignoring also the latter case because there is no way + * of synchronizing client's connect and server's accept when + * connect(s) are constantly being interrupted by signals. + */ + if (ret == -1 && (errno != EINTR && errno != ECONNRESET)) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Notify the server if the connect() is successful or the + * receiver connection queue is full, so it will do accept() + * to drain it. + */ + if (!ret || errno == ECONNRESET) + send_control = true; + + /* Set CID to 0 cause a transport change. */ + sa.svm_cid = 0; + + /* There is a case where this will not fail: + * if the previous connect() is interrupted while the + * connection request is already sent, this second + * connect() will wait for the response. + */ + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); + if (!ret || errno == ECONNRESET) + send_control = true; + + close(s); + + if (send_control) + control_writeulong(CONTROL_CONTINUE); + + } while (current_nsec() < tout); + + control_writeulong(CONTROL_DONE); + + ret = pthread_cancel(thread_id); + if (ret) { + fprintf(stderr, "pthread_cancel: %d\n", ret); + exit(EXIT_FAILURE); + } + + ret = pthread_join(thread_id, NULL); + if (ret) { + fprintf(stderr, "pthread_join: %d\n", ret); + exit(EXIT_FAILURE); + } + + if (signal(SIGUSR1, old_handler) == SIG_ERR) { + perror("signal"); + exit(EXIT_FAILURE); + } +} + +static void test_stream_transport_change_server(const struct test_opts *opts) +{ + int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + + /* Set the socket to be nonblocking because connects that have been interrupted + * (EINTR) can fill the receiver's accept queue anyway, leading to connect failure. + * As of today (6.15) in such situation there is no way to understand, from the + * client side, if the connection has been queued in the server or not. + */ + if (fcntl(s, F_SETFL, fcntl(s, F_GETFL, 0) | O_NONBLOCK) < 0) { + perror("fcntl"); + exit(EXIT_FAILURE); + } + control_writeln("LISTENING"); + + while (control_readulong() == CONTROL_CONTINUE) { + /* Must accept the connection, otherwise the `listen` + * queue will fill up and new connections will fail. + * There can be more than one queued connection, + * clear them all. + */ + while (true) { + int client = accept(s, NULL, NULL); + + if (client < 0) { + if (errno == EAGAIN) + break; + + perror("accept"); + exit(EXIT_FAILURE); + } + + close(client); + } + } + + close(s); +} + static void test_stream_linger_client(const struct test_opts *opts) { int fd; @@ -2034,7 +2335,6 @@ static struct test_case test_cases[] = { { .name = "SOCK_STREAM transport release use-after-free", .run_client = test_stream_transport_uaf_client, - .run_server = test_stream_transport_uaf_server, }, { .name = "SOCK_STREAM retry failed connect()", @@ -2051,6 +2351,21 @@ static struct test_case test_cases[] = { .run_client = test_stream_nolinger_client, .run_server = test_stream_nolinger_server, }, + { + .name = "SOCK_STREAM transport change null-ptr-deref", + .run_client = test_stream_transport_change_client, + .run_server = test_stream_transport_change_server, + }, + { + .name = "SOCK_STREAM ioctl(SIOCINQ) functionality", + .run_client = test_stream_unread_bytes_client, + .run_server = test_stream_unread_bytes_server, + }, + { + .name = "SOCK_SEQPACKET ioctl(SIOCINQ) functionality", + .run_client = test_seqpacket_unread_bytes_client, + .run_server = test_seqpacket_unread_bytes_server, + }, {}, }; |