diff options
Diffstat (limited to 'tools/testing')
291 files changed, 12747 insertions, 3101 deletions
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index d07f14cb7aa4..0d5ce4b74b9f 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -5,22 +5,19 @@ ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=nvdimm_bus_register ldflags-y += --wrap=devm_cxl_port_enumerate_dports -ldflags-y += --wrap=devm_cxl_setup_hdm -ldflags-y += --wrap=devm_cxl_add_passthrough_decoder -ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready -ldflags-y += --wrap=cxl_hdm_decode_init -ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat ldflags-y += --wrap=cxl_dport_init_ras_reporting +ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl CXL_CORE_SRC := $(DRIVERS)/cxl/core ccflags-y := -I$(srctree)/drivers/cxl/ ccflags-y += -D__mock=__weak +ccflags-y += -DCXL_TEST_ENABLE=1 ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/ obj-m += cxl_acpi.o diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c index f088792a8925..6754de35598d 100644 --- a/tools/testing/cxl/cxl_core_exports.c +++ b/tools/testing/cxl/cxl_core_exports.c @@ -2,6 +2,28 @@ /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include "cxl.h" +#include "exports.h" /* Exporting of cxl_core symbols that are only used by cxl_test */ EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL"); + +cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev = + __devm_cxl_add_dport_by_dev; +EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL"); + +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + return _devm_cxl_add_dport_by_dev(port, dport_dev); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL"); + +cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup = + __devm_cxl_switch_port_decoders_setup; +EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL"); + +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + return _devm_cxl_switch_port_decoders_setup(port); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL"); diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h new file mode 100644 index 000000000000..7ebee7c0bd67 --- /dev/null +++ b/tools/testing/cxl/exports.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2025 Intel Corporation */ +#ifndef __MOCK_CXL_EXPORTS_H_ +#define __MOCK_CXL_EXPORTS_H_ + +typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port, + struct device *dport_dev); +extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev; + +typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port); +extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup; + +#endif diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 6a25cca5636f..2d135ca533d0 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -210,7 +210,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -225,7 +225,7 @@ static struct { }, .interleave_ways = 1, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -240,7 +240,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -255,7 +255,7 @@ static struct { }, .interleave_ways = 1, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -270,7 +270,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -285,7 +285,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M, @@ -302,7 +302,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -318,7 +318,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 1, .granularity = 0, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -334,7 +334,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 8, .granularity = 1, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_512M * 6UL, @@ -643,15 +643,8 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port, return cxlhdm; } -static int mock_cxl_add_passthrough_decoder(struct cxl_port *port) -{ - dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n"); - return -EOPNOTSUPP; -} - - struct target_map_ctx { - int *target_map; + u32 *target_map; int index; int target_count; }; @@ -818,15 +811,21 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) */ if (WARN_ON(!dev)) continue; + cxlsd = to_cxl_switch_decoder(dev); if (i == 0) { /* put cxl_mem.4 second in the decode order */ - if (pdev->id == 4) + if (pdev->id == 4) { cxlsd->target[1] = dport; - else + cxld->target_map[1] = dport->port_id; + } else { cxlsd->target[0] = dport; - } else + cxld->target_map[0] = dport->port_id; + } + } else { cxlsd->target[0] = dport; + cxld->target_map[0] = dport->port_id; + } cxld = &cxlsd->cxld; cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->flags = CXL_DECODER_F_ENABLE; @@ -863,9 +862,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, target_count = NR_CXL_SWITCH_PORTS; for (i = 0; i < NR_CXL_PORT_DECODERS; i++) { - int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; struct target_map_ctx ctx = { - .target_map = target_map, .target_count = target_count, }; struct cxl_decoder *cxld; @@ -894,6 +891,8 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxld = &cxled->cxld; } + ctx.target_map = cxld->target_map; + mock_init_hdm_decoder(cxld); if (target_count) { @@ -905,7 +904,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, } } - rc = cxl_decoder_add_locked(cxld, target_map); + rc = cxl_decoder_add_locked(cxld); if (rc) { put_device(&cxld->dev); dev_err(&port->dev, "Failed to add decoder\n"); @@ -921,10 +920,42 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return 0; } -static int mock_cxl_port_enumerate_dports(struct cxl_port *port) +static int __mock_cxl_decoders_setup(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm; + + cxlhdm = mock_cxl_setup_hdm(port, NULL); + if (IS_ERR(cxlhdm)) { + if (PTR_ERR(cxlhdm) != -ENODEV) + dev_err(&port->dev, "Failed to map HDM decoder capability\n"); + return PTR_ERR(cxlhdm); + } + + return mock_cxl_enumerate_decoders(cxlhdm, NULL); +} + +static int mock_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + return __mock_cxl_decoders_setup(port); +} + +static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port) +{ + if (!is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + return __mock_cxl_decoders_setup(port); +} + +static int get_port_array(struct cxl_port *port, + struct platform_device ***port_array, + int *port_array_size) { struct platform_device **array; - int i, array_size; + int array_size; if (port->depth == 1) { if (is_multi_bridge(port->uport_dev)) { @@ -958,6 +989,22 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return -ENXIO; } + *port_array = array; + *port_array_size = array_size; + + return 0; +} + +static int mock_cxl_port_enumerate_dports(struct cxl_port *port) +{ + struct platform_device **array; + int i, array_size; + int rc; + + rc = get_port_array(port, &array, &array_size); + if (rc) + return rc; + for (i = 0; i < array_size; i++) { struct platform_device *pdev = array[i]; struct cxl_dport *dport; @@ -979,6 +1026,36 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return 0; } +static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + struct platform_device **array; + int rc, i, array_size; + + rc = get_port_array(port, &array, &array_size); + if (rc) + return ERR_PTR(rc); + + for (i = 0; i < array_size; i++) { + struct platform_device *pdev = array[i]; + + if (pdev->dev.parent != port->uport_dev) { + dev_dbg(&port->dev, "%s: mismatch parent %s\n", + dev_name(port->uport_dev), + dev_name(pdev->dev.parent)); + continue; + } + + if (&pdev->dev != dport_dev) + continue; + + return devm_cxl_add_dport(port, &pdev->dev, pdev->id, + CXL_RESOURCE_NONE); + } + + return ERR_PTR(-ENODEV); +} + /* * Faking the cxl_dpa_perf for the memdev when appropriate. */ @@ -1035,11 +1112,11 @@ static struct cxl_mock_ops cxl_mock_ops = { .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, + .devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup, + .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup, .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, - .devm_cxl_setup_hdm = mock_cxl_setup_hdm, - .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder, - .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders, .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat, + .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 1989ae020df3..995269a75cbd 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -10,12 +10,21 @@ #include <cxlmem.h> #include <cxlpci.h> #include "mock.h" +#include "../exports.h" static LIST_HEAD(mock); +static struct cxl_dport * +redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); +static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port); + void register_cxl_mock_ops(struct cxl_mock_ops *ops) { list_add_rcu(&ops->list, &mock); + _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev; + _devm_cxl_switch_port_decoders_setup = + redirect_devm_cxl_switch_port_decoders_setup; } EXPORT_SYMBOL_GPL(register_cxl_mock_ops); @@ -23,6 +32,9 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu); void unregister_cxl_mock_ops(struct cxl_mock_ops *ops) { + _devm_cxl_switch_port_decoders_setup = + __devm_cxl_switch_port_decoders_setup; + _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev; list_del_rcu(&ops->list); synchronize_srcu(&cxl_mock_srcu); } @@ -131,55 +143,34 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); -struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) - -{ - int index; - struct cxl_hdm *cxlhdm; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(port->uport_dev)) - cxlhdm = ops->devm_cxl_setup_hdm(port, info); - else - cxlhdm = devm_cxl_setup_hdm(port, info); - put_cxl_mock_ops(index); - - return cxlhdm; -} -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL"); - -int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) +int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port) { int rc, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_add_passthrough_decoder(port); + rc = ops->devm_cxl_switch_port_decoders_setup(port); else - rc = devm_cxl_add_passthrough_decoder(port); + rc = __devm_cxl_switch_port_decoders_setup(port); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL"); -int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) +int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port) { int rc, index; - struct cxl_port *port = cxlhdm->port; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info); + rc = ops->devm_cxl_endpoint_decoders_setup(port); else - rc = devm_cxl_enumerate_decoders(cxlhdm, info); + rc = devm_cxl_endpoint_decoders_setup(port); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL"); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL"); int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) { @@ -211,39 +202,6 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL"); -int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) -{ - int rc = 0, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_dev(cxlds->dev)) - rc = 0; - else - rc = cxl_hdm_decode_init(cxlds, cxlhdm, info); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL"); - -int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) -{ - int rc = 0, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_dev(cxlds->dev)) - rc = 0; - else - rc = cxl_dvsec_rr_decode(cxlds, info); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL"); - struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, @@ -311,6 +269,22 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL"); +struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + struct cxl_dport *dport; + + if (ops && ops->is_mock_port(port->uport_dev)) + dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev); + else + dport = __devm_cxl_add_dport_by_dev(port, dport_dev); + put_cxl_mock_ops(index); + + return dport; +} + MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("cxl_test: emulation module"); MODULE_IMPORT_NS("ACPI"); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index d1b0271d2822..4ed932e76aae 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -20,12 +20,11 @@ struct cxl_mock_ops { bool (*is_mock_port)(struct device *dev); bool (*is_mock_dev)(struct device *dev); int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port); - struct cxl_hdm *(*devm_cxl_setup_hdm)( - struct cxl_port *port, struct cxl_endpoint_dvsec_info *info); - int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port); - int (*devm_cxl_enumerate_decoders)( - struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info); + int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port); + int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port); void (*cxl_endpoint_parse_cdat)(struct cxl_port *port); + struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port, + struct device *dport_dev); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops); diff --git a/tools/testing/kunit/configs/arch_uml.config b/tools/testing/kunit/configs/arch_uml.config index 54ad8972681a..28edf816aa70 100644 --- a/tools/testing/kunit/configs/arch_uml.config +++ b/tools/testing/kunit/configs/arch_uml.config @@ -1,8 +1,7 @@ # Config options which are added to UML builds by default -# Enable virtio/pci, as a lot of tests require it. -CONFIG_VIRTIO_UML=y -CONFIG_UML_PCI_OVER_VIRTIO=y +# Enable pci, as a lot of tests require it. +CONFIG_KUNIT_UML_PCI=y # Enable FORTIFY_SOURCE for wider checking. CONFIG_FORTIFY_SOURCE=y diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 7f9ae55fd6d5..cd99c1956331 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -228,7 +228,7 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input fake_test.counts.passed = 1 output: Iterable[str] = input_data - if request.raw_output == 'all': + if request.raw_output == 'all' or request.raw_output == 'full': pass elif request.raw_output == 'kunit': output = kunit_parser.extract_tap_lines(output) @@ -425,7 +425,7 @@ def add_parse_opts(parser: argparse.ArgumentParser) -> None: parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. ' 'By default, filters to just KUnit output. Use ' '--raw_output=all to show everything', - type=str, nargs='?', const='all', default=None, choices=['all', 'kunit']) + type=str, nargs='?', const='all', default=None, choices=['all', 'full', 'kunit']) parser.add_argument('--json', nargs='?', help='Prints parsed test results as JSON to stdout or a file if ' diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index c176487356e6..333cd3a4a56b 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -352,9 +352,9 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool: lines.pop() return True -TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') +TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?([^#]*)( # .*)?$') -TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$') +TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?(.*) # SKIP ?(.*)$') def peek_test_name_match(lines: LineStream, test: Test) -> bool: """ @@ -379,6 +379,8 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool: if not match: return False name = match.group(4) + if not name: + return False return name == test.name def parse_test_result(lines: LineStream, test: Test, @@ -416,7 +418,7 @@ def parse_test_result(lines: LineStream, test: Test, # Set name of test object if skip_match: - test.name = skip_match.group(4) + test.name = skip_match.group(4) or skip_match.group(5) else: test.name = match.group(4) diff --git a/tools/testing/kunit/qemu_configs/mips.py b/tools/testing/kunit/qemu_configs/mips.py new file mode 100644 index 000000000000..8899ac157b30 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/mips.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='mips', + kconfig=''' +CONFIG_32BIT=y +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_MIPS_MALTA=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +''', + qemu_arch='mips', + kernel_path='vmlinuz', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'malta']) diff --git a/tools/testing/kunit/qemu_configs/mips64.py b/tools/testing/kunit/qemu_configs/mips64.py new file mode 100644 index 000000000000..1478aed05b94 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/mips64.py @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='mips', + kconfig=''' +CONFIG_CPU_MIPS64_R2=y +CONFIG_64BIT=y +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_MIPS_MALTA=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +''', + qemu_arch='mips64', + kernel_path='vmlinuz', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'malta', '-cpu', '5KEc']) diff --git a/tools/testing/kunit/qemu_configs/mips64el.py b/tools/testing/kunit/qemu_configs/mips64el.py new file mode 100644 index 000000000000..300c711d7a82 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/mips64el.py @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='mips', + kconfig=''' +CONFIG_CPU_MIPS64_R2=y +CONFIG_64BIT=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_MALTA=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +''', + qemu_arch='mips64el', + kernel_path='vmlinuz', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'malta', '-cpu', '5KEc']) diff --git a/tools/testing/kunit/qemu_configs/mipsel.py b/tools/testing/kunit/qemu_configs/mipsel.py new file mode 100644 index 000000000000..3d3543315b45 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/mipsel.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='mips', + kconfig=''' +CONFIG_32BIT=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_MALTA=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +''', + qemu_arch='mipsel', + kernel_path='vmlinuz', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'malta']) diff --git a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log index 65d3f27feaf2..30d9ef18bcec 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log @@ -1,5 +1,5 @@ TAP version 13 -1..2 +1..3 # selftests: membarrier: membarrier_test_single_thread # TAP version 13 # 1..2 @@ -12,3 +12,4 @@ ok 1 selftests: membarrier: membarrier_test_single_thread # ok 1 sys_membarrier available # ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected ok 2 selftests: membarrier: membarrier_test_multi_thread +ok 3 # SKIP selftests: membarrier: membarrier_test_multi_thread diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 84b8c3c92c79..2f830ff8396c 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -499,19 +499,17 @@ void ida_check_random(void) goto repeat; } -void ida_simple_get_remove_test(void) +void ida_alloc_free_test(void) { DEFINE_IDA(ida); unsigned long i; - for (i = 0; i < 10000; i++) { - assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i); - } - assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0); + for (i = 0; i < 10000; i++) + assert(ida_alloc_max(&ida, 20000, GFP_KERNEL) == i); + assert(ida_alloc_range(&ida, 5, 30, GFP_KERNEL) < 0); - for (i = 0; i < 10000; i++) { - ida_simple_remove(&ida, i); - } + for (i = 0; i < 10000; i++) + ida_free(&ida, i); assert(ida_is_empty(&ida)); ida_destroy(&ida); @@ -524,7 +522,7 @@ void user_ida_checks(void) ida_check_nomem(); ida_check_conv_user(); ida_check_random(); - ida_simple_get_remove_test(); + ida_alloc_free_test(); radix_tree_cpu_dead(1); } diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 172700fb7784..5c1b18e3ed21 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -8,14 +8,6 @@ * difficult to handle in kernel tests. */ -#define CONFIG_DEBUG_MAPLE_TREE -#define CONFIG_MAPLE_SEARCH -#define MAPLE_32BIT (MAPLE_NODE_SLOTS > 31) -#include "test.h" -#include <stdlib.h> -#include <time.h> -#include <linux/init.h> - #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) @@ -23,7 +15,9 @@ #define MODULE_LICENSE(x) #define dump_stack() assert(0) -#include "../../../lib/maple_tree.c" +#include "test.h" + +#include "../shared/maple-shim.c" #include "../../../lib/test_maple_tree.c" #define RCU_RANGE_COUNT 1000 @@ -63,430 +57,6 @@ struct rcu_reader_struct { struct rcu_test_struct2 *test; }; -static int get_alloc_node_count(struct ma_state *mas) -{ - int count = 1; - struct maple_alloc *node = mas->alloc; - - if (!node || ((unsigned long)node & 0x1)) - return 0; - while (node->node_count) { - count += node->node_count; - node = node->slot[0]; - } - return count; -} - -static void check_mas_alloc_node_count(struct ma_state *mas) -{ - mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 1, GFP_KERNEL); - mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 3, GFP_KERNEL); - MT_BUG_ON(mas->tree, get_alloc_node_count(mas) != mas->alloc->total); - mas_destroy(mas); -} - -/* - * check_new_node() - Check the creation of new nodes and error path - * verification. - */ -static noinline void __init check_new_node(struct maple_tree *mt) -{ - - struct maple_node *mn, *mn2, *mn3; - struct maple_alloc *smn; - struct maple_node *nodes[100]; - int i, j, total; - - MA_STATE(mas, mt, 0, 0); - - check_mas_alloc_node_count(&mas); - - /* Try allocating 3 nodes */ - mtree_lock(mt); - mt_set_non_kernel(0); - /* request 3 nodes to be allocated. */ - mas_node_count(&mas, 3); - /* Allocation request of 3. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 3); - /* Allocate failed. */ - MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - - MT_BUG_ON(mt, mas_allocated(&mas) != 3); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, mas.alloc == NULL); - MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); - mas_push_node(&mas, mn); - mas_reset(&mas); - mas_destroy(&mas); - mtree_unlock(mt); - - - /* Try allocating 1 node, then 2 more */ - mtree_lock(mt); - /* Set allocation request to 1. */ - mas_set_alloc_req(&mas, 1); - /* Check Allocation request of 1. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 1); - mas_set_err(&mas, -ENOMEM); - /* Validate allocation request. */ - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - /* Eat the requested node. */ - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, mn->slot[0] != NULL); - MT_BUG_ON(mt, mn->slot[1] != NULL); - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mas.status = ma_start; - mas_destroy(&mas); - /* Allocate 3 nodes, will fail. */ - mas_node_count(&mas, 3); - /* Drop the lock and allocate 3 nodes. */ - mas_nomem(&mas, GFP_KERNEL); - /* Ensure 3 are allocated. */ - MT_BUG_ON(mt, mas_allocated(&mas) != 3); - /* Allocation request of 0. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 0); - - MT_BUG_ON(mt, mas.alloc == NULL); - MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); - MT_BUG_ON(mt, mas.alloc->slot[1] == NULL); - /* Ensure we counted 3. */ - MT_BUG_ON(mt, mas_allocated(&mas) != 3); - /* Free. */ - mas_reset(&mas); - mas_destroy(&mas); - - /* Set allocation request to 1. */ - mas_set_alloc_req(&mas, 1); - MT_BUG_ON(mt, mas_alloc_req(&mas) != 1); - mas_set_err(&mas, -ENOMEM); - /* Validate allocation request. */ - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - MT_BUG_ON(mt, mas_allocated(&mas) != 1); - /* Check the node is only one node. */ - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, mn->slot[0] != NULL); - MT_BUG_ON(mt, mn->slot[1] != NULL); - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas_allocated(&mas) != 1); - MT_BUG_ON(mt, mas.alloc->node_count); - - mas_set_alloc_req(&mas, 2); /* request 2 more. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 2); - mas_set_err(&mas, -ENOMEM); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - MT_BUG_ON(mt, mas_allocated(&mas) != 3); - MT_BUG_ON(mt, mas.alloc == NULL); - MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); - MT_BUG_ON(mt, mas.alloc->slot[1] == NULL); - for (i = 2; i >= 0; i--) { - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, mas_allocated(&mas) != i); - MT_BUG_ON(mt, !mn); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - - total = 64; - mas_set_alloc_req(&mas, total); /* request 2 more. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != total); - mas_set_err(&mas, -ENOMEM); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - for (i = total; i > 0; i--) { - unsigned int e = 0; /* expected node_count */ - - if (!MAPLE_32BIT) { - if (i >= 35) - e = i - 34; - else if (i >= 5) - e = i - 4; - else if (i >= 2) - e = i - 1; - } else { - if (i >= 4) - e = i - 3; - else if (i >= 1) - e = i - 1; - else - e = 0; - } - - MT_BUG_ON(mt, mas.alloc->node_count != e); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != i - 1); - MT_BUG_ON(mt, !mn); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - - total = 100; - for (i = 1; i < total; i++) { - mas_set_alloc_req(&mas, i); - mas_set_err(&mas, -ENOMEM); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - for (j = i; j > 0; j--) { - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, mas_allocated(&mas) != j - 1); - MT_BUG_ON(mt, !mn); - MT_BUG_ON(mt, not_empty(mn)); - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas_allocated(&mas) != j); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != j - 1); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - - mas_set_alloc_req(&mas, i); - mas_set_err(&mas, -ENOMEM); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - for (j = 0; j <= i/2; j++) { - MT_BUG_ON(mt, mas_allocated(&mas) != i - j); - nodes[j] = mas_pop_node(&mas); - MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1); - } - - while (j) { - j--; - mas_push_node(&mas, nodes[j]); - MT_BUG_ON(mt, mas_allocated(&mas) != i - j); - } - MT_BUG_ON(mt, mas_allocated(&mas) != i); - for (j = 0; j <= i/2; j++) { - MT_BUG_ON(mt, mas_allocated(&mas) != i - j); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1); - } - mas_reset(&mas); - MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL)); - mas_destroy(&mas); - - } - - /* Set allocation request. */ - total = 500; - mas_node_count(&mas, total); - /* Drop the lock and allocate the nodes. */ - mas_nomem(&mas, GFP_KERNEL); - MT_BUG_ON(mt, !mas.alloc); - i = 1; - smn = mas.alloc; - while (i < total) { - for (j = 0; j < MAPLE_ALLOC_SLOTS; j++) { - i++; - MT_BUG_ON(mt, !smn->slot[j]); - if (i == total) - break; - } - smn = smn->slot[0]; /* next. */ - } - MT_BUG_ON(mt, mas_allocated(&mas) != total); - mas_reset(&mas); - mas_destroy(&mas); /* Free. */ - - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - for (i = 1; i < 128; i++) { - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */ - for (j = i; j > 0; j--) { /*Free the requests */ - mn = mas_pop_node(&mas); /* get the next node. */ - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - } - - for (i = 1; i < MAPLE_NODE_MASK + 1; i++) { - MA_STATE(mas2, mt, 0, 0); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */ - for (j = 1; j <= i; j++) { /* Move the allocations to mas2 */ - mn = mas_pop_node(&mas); /* get the next node. */ - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, not_empty(mn)); - mas_push_node(&mas2, mn); - MT_BUG_ON(mt, mas_allocated(&mas2) != j); - } - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_allocated(&mas2) != i); - - for (j = i; j > 0; j--) { /*Free the requests */ - MT_BUG_ON(mt, mas_allocated(&mas2) != j); - mn = mas_pop_node(&mas2); /* get the next node. */ - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - MT_BUG_ON(mt, mas_allocated(&mas2) != 0); - } - - - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */ - MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); - - mn = mas_pop_node(&mas); /* get the next node. */ - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); - - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); - - /* Check the limit of pop/push/pop */ - mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 1); - MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - MT_BUG_ON(mt, mas_alloc_req(&mas)); - MT_BUG_ON(mt, mas.alloc->node_count != 1); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas.alloc->node_count != 1); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) { - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - - - for (i = 3; i < MAPLE_NODE_MASK * 3; i++) { - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mas_push_node(&mas, mn); /* put it back */ - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mn2 = mas_pop_node(&mas); /* get the next node. */ - mas_push_node(&mas, mn); /* put them back */ - mas_push_node(&mas, mn2); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mn2 = mas_pop_node(&mas); /* get the next node. */ - mn3 = mas_pop_node(&mas); /* get the next node. */ - mas_push_node(&mas, mn); /* put them back */ - mas_push_node(&mas, mn2); - mas_push_node(&mas, mn3); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mn = mas_pop_node(&mas); /* get the next node. */ - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mn = mas_pop_node(&mas); /* get the next node. */ - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mas_destroy(&mas); - } - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, 5); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != 5); - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, 10); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.status = ma_start; - MT_BUG_ON(mt, mas_allocated(&mas) != 10); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS - 1); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS - 1); - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, 10 + MAPLE_ALLOC_SLOTS - 1); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.status = ma_start; - MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 2); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.status = ma_start; - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 2); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 1); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 1); - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 3 + 2); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.status = ma_start; - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 3 + 2); - mas_destroy(&mas); - - mtree_unlock(mt); -} - /* * Check erasing including RCU. */ @@ -35455,17 +35025,6 @@ static void check_dfs_preorder(struct maple_tree *mt) MT_BUG_ON(mt, count != e); mtree_destroy(mt); - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); - mas_reset(&mas); - mt_zero_nr_tallocated(); - mt_set_non_kernel(200); - mas_expected_entries(&mas, max); - for (count = 0; count <= max; count++) { - mas.index = mas.last = count; - mas_store(&mas, xa_mk_value(count)); - MT_BUG_ON(mt, mas_is_err(&mas)); - } - mas_destroy(&mas); rcu_barrier(); /* * pr_info(" ->seq test of 0-%lu %luK in %d active (%d total)\n", @@ -35524,6 +35083,18 @@ static unsigned char get_vacant_height(struct ma_wr_state *wr_mas, void *entry) return vacant_height; } +static int mas_allocated(struct ma_state *mas) +{ + int total = 0; + + if (mas->alloc) + total++; + + if (mas->sheaf) + total += kmem_cache_sheaf_size(mas->sheaf); + + return total; +} /* Preallocation testing */ static noinline void __init check_prealloc(struct maple_tree *mt) { @@ -35542,7 +35113,10 @@ static noinline void __init check_prealloc(struct maple_tree *mt) /* Spanning store */ mas_set_range(&mas, 470, 500); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + + mas_wr_preallocate(&wr_mas, ptr); + MT_BUG_ON(mt, mas.store_type != wr_spanning_store); + MT_BUG_ON(mt, mas_is_err(&mas)); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); vacant_height = get_vacant_height(&wr_mas, ptr); @@ -35552,6 +35126,7 @@ static noinline void __init check_prealloc(struct maple_tree *mt) allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); + mas_wr_preallocate(&wr_mas, ptr); MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); @@ -35597,20 +35172,6 @@ static noinline void __init check_prealloc(struct maple_tree *mt) height = mas_mt_height(&mas); vacant_height = get_vacant_height(&wr_mas, ptr); MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas_allocated(&mas) != allocated); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); - mas_destroy(&mas); - allocated = mas_allocated(&mas); - MT_BUG_ON(mt, allocated != 0); - - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); - allocated = mas_allocated(&mas); - height = mas_mt_height(&mas); - vacant_height = get_vacant_height(&wr_mas, ptr); - MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -36327,13 +35888,18 @@ extern void test_kmem_cache_bulk(void); static inline void check_spanning_store_height(struct maple_tree *mt) { int index = 0; + int last = 140; MA_STATE(mas, mt, 0, 0); mas_lock(&mas); while (mt_height(mt) != 3) { mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL); mas_set(&mas, ++index); } - mas_set_range(&mas, 90, 140); + + if (MAPLE_32BIT) + last = 155; /* 32 bit higher branching factor. */ + + mas_set_range(&mas, 90, last); mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL); MT_BUG_ON(mt, mas_mt_height(&mas) != 2); mas_unlock(&mas); @@ -36406,11 +35972,17 @@ static void check_nomem_writer_race(struct maple_tree *mt) check_load(mt, 6, xa_mk_value(0xC)); mtree_unlock(mt); + mt_set_non_kernel(0); /* test for the same race but with mas_store_gfp() */ mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL); mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL); mas_set_range(&mas, 0, 5); + + /* setup writer 2 that will trigger the race condition */ + mt_set_private(mt); + mt_set_callback(writer2); + mtree_lock(mt); mas_store_gfp(&mas, NULL, GFP_KERNEL); @@ -36428,6 +36000,7 @@ static void check_nomem_writer_race(struct maple_tree *mt) */ static inline int check_vma_modification(struct maple_tree *mt) { +#if defined(CONFIG_64BIT) MA_STATE(mas, mt, 0, 0); mtree_lock(mt); @@ -36451,30 +36024,11 @@ static inline int check_vma_modification(struct maple_tree *mt) mas_destroy(&mas); mtree_unlock(mt); +#endif + return 0; } -/* - * test to check that bulk stores do not use wr_rebalance as the store - * type. - */ -static inline void check_bulk_rebalance(struct maple_tree *mt) -{ - MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX); - int max = 10; - - build_full_tree(mt, 0, 2); - - /* erase every entry in the tree */ - do { - /* set up bulk store mode */ - mas_expected_entries(&mas, max); - mas_erase(&mas); - MT_BUG_ON(mt, mas.store_type == wr_rebalance); - } while (mas_prev(&mas, 0) != NULL); - - mas_destroy(&mas); -} void farmer_tests(void) { @@ -36487,10 +36041,6 @@ void farmer_tests(void) check_vma_modification(&tree); mtree_destroy(&tree); - mt_init(&tree); - check_bulk_rebalance(&tree); - mtree_destroy(&tree); - tree.ma_root = xa_mk_value(0); mt_dump(&tree, mt_dump_dec); @@ -36550,10 +36100,6 @@ void farmer_tests(void) check_erase_testset(&tree); mtree_destroy(&tree); - mt_init_flags(&tree, 0); - check_new_node(&tree); - mtree_destroy(&tree); - if (!MAPLE_32BIT) { mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_rcu_simulated(&tree); diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 5bd9e6e80625..121ae78d6e88 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -51,7 +51,6 @@ static inline unsigned long page_to_phys(struct page *page) #define page_to_pfn(page) ((unsigned long)(page) / PAGE_SIZE) #define pfn_to_page(pfn) (void *)((pfn) * PAGE_SIZE) -#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) #define __min(t1, t2, min1, min2, x, y) ({ \ t1 min1 = (x); \ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 030da61dbff3..c46ebdb9b8ef 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -36,6 +36,7 @@ TARGETS += filesystems/fat TARGETS += filesystems/overlayfs TARGETS += filesystems/statmount TARGETS += filesystems/mount-notify +TARGETS += filesystems/fuse TARGETS += firmware TARGETS += fpu TARGETS += ftrace @@ -124,6 +125,7 @@ TARGETS += uevent TARGETS += user_events TARGETS += vDSO TARGETS += mm +TARGETS += vfio TARGETS += x86 TARGETS += x86/bugs TARGETS += zram @@ -314,7 +316,7 @@ gen_tar: install @echo "Created ${TAR_PATH}" clean: - @for TARGET in $(TARGETS); do \ + @for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index 2a4b2662035e..e113dafa5c24 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -53,10 +53,10 @@ struct ctl_data { struct ctl_data *next; }; -int num_cards = 0; -int num_controls = 0; -struct card_data *card_list = NULL; -struct ctl_data *ctl_list = NULL; +int num_cards; +int num_controls; +struct card_data *card_list; +struct ctl_data *ctl_list; static void find_controls(void) { diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index dbd7c222ce93..ce92548670c8 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -30,7 +30,7 @@ struct card_data { struct card_data *next; }; -struct card_data *card_list = NULL; +struct card_data *card_list; struct pcm_data { snd_pcm_t *handle; @@ -43,10 +43,10 @@ struct pcm_data { struct pcm_data *next; }; -struct pcm_data *pcm_list = NULL; +struct pcm_data *pcm_list; -int num_missing = 0; -struct pcm_data *pcm_missing = NULL; +int num_missing; +struct pcm_data *pcm_missing; snd_config_t *default_pcm_config; diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index d89eda3fd8a3..2cd9165c7348 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -219,7 +219,7 @@ extern void bpf_put_file(struct file *file) __ksym; * including the NULL termination character, stored in the supplied * buffer. On error, a negative integer is returned. */ -extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __ksym; +extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym; /* This macro must be used to mark the exception callback corresponding to the * main program. For example: diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c index ffbd4b116d17..23b2aa2604de 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -64,7 +64,8 @@ int dump_udp4(struct bpf_iter__udp *ctx) 0, 0L, 0, ctx->uid, 0, sock_i_ino(&inet->sk), inet->sk.sk_refcnt.refs.counter, udp_sk, - inet->sk.sk_drops.counter); + udp_sk->drop_counters.drops0.counter + + udp_sk->drop_counters.drops1.counter); return 0; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c index 47ff7754f4fd..c48b05aa2a4b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -72,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx) 0, 0L, 0, ctx->uid, 0, sock_i_ino(&inet->sk), inet->sk.sk_refcnt.refs.counter, udp_sk, - inet->sk.sk_drops.counter); - + udp_sk->drop_counters.drops0.counter + + udp_sk->drop_counters.drops1.counter); return 0; } diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c index 70302477e326..41389e579578 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c @@ -117,7 +117,7 @@ int _getsockopt_subflow(struct bpf_sockopt *ctx) return 1; msk = bpf_core_cast(sk, struct mptcp_sock); - if (msk->pm.subflows != 1) { + if (msk->pm.extra_subflows != 1) { ctx->retval = -1; return 1; } diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 9a3499827d4b..2180c328a825 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -5,6 +5,7 @@ TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _damon_sysfs.py TEST_FILES += drgn_dump_damon_status.py +TEST_FILES += _common.sh # functionality tests TEST_PROGS += sysfs.sh @@ -18,6 +19,7 @@ TEST_PROGS += reclaim.sh lru_sort.sh TEST_PROGS += sysfs_update_removed_scheme_dir.sh TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py TEST_PROGS += sysfs_memcg_path_leak.sh +TEST_PROGS += sysfs_no_op_commit_break.py EXTRA_CLEAN = __pycache__ diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c index a9f4e9aaf3a9..93f3a71bcfd4 100644 --- a/tools/testing/selftests/damon/access_memory_even.c +++ b/tools/testing/selftests/damon/access_memory_even.c @@ -9,7 +9,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <time.h> int main(int argc, char *argv[]) { diff --git a/tools/testing/selftests/damon/sysfs_no_op_commit_break.py b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py new file mode 100755 index 000000000000..2c65cffe6b54 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import json +import os +import subprocess +import sys + +import _damon_sysfs + +def dump_damon_status_dict(pid): + try: + subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL) + except: + return None, 'drgn not found' + file_dir = os.path.dirname(os.path.abspath(__file__)) + dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py') + rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'], + stderr=subprocess.DEVNULL) + + if rc != 0: + return None, f'drgn fail: return code({rc})' + try: + with open('damon_dump_output', 'r') as f: + return json.load(f), None + except Exception as e: + return None, 'json.load fail (%s)' % e + +def main(): + kdamonds = _damon_sysfs.Kdamonds( + [_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + schemes=[_damon_sysfs.Damos( + ops_filters=[ + _damon_sysfs.DamosFilter( + type_='anon', + matching=True, + allow=True, + ) + ] + )], + )])] + ) + + err = kdamonds.start() + if err is not None: + print('kdamond start failed: %s' % err) + exit(1) + + before_commit_status, err = \ + dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print('before-commit status dump failed: %s' % err) + exit(1) + + kdamonds.kdamonds[0].commit() + + after_commit_status, err = \ + dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print('after-commit status dump failed: %s' % err) + exit(1) + + if before_commit_status != after_commit_status: + print(f'before: {json.dumps(before_commit_status, indent=2)}') + print(f'after: {json.dumps(after_commit_status, indent=2)}') + exit(1) + + kdamonds.stop() + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore index d634d8395d90..585ecb4d5dc4 100644 --- a/tools/testing/selftests/drivers/net/.gitignore +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only napi_id_helper +psp_responder diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 984ece05f7f9..bd3af9a34e2f 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -19,6 +19,7 @@ TEST_PROGS := \ netcons_sysdata.sh \ netpoll_basic.py \ ping.py \ + psp.py \ queues.py \ stats.py \ shaper.py \ @@ -26,4 +27,13 @@ TEST_PROGS := \ xdp.py \ # end of TEST_PROGS +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := psp_responder +TEST_GEN_FILES += $(YNL_GEN_FILES) + include ../../lib.mk + +# YNL build +YNL_GENS := psp + +include ../../net/ynl.mk diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 44b98f17f8ff..2f095cf67d9a 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -11,7 +11,9 @@ TEST_PROGS := \ bond_options.sh \ bond-eth-type-change.sh \ bond_macvlan_ipvlan.sh \ - bond_passive_lacp.sh + bond_passive_lacp.sh \ + bond_lacp_prio.sh + bond_ipsec_offload.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh new file mode 100755 index 000000000000..f09e100232c7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh @@ -0,0 +1,156 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# IPsec over bonding offload test: +# +# +----------------+ +# | bond0 | +# | | | +# | eth0 eth1 | +# +---+-------+----+ +# +# We use netdevsim instead of physical interfaces +#------------------------------------------------------------------- +# Example commands +# ip x s add proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 replay-window 32 \ +# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \ +# sel src 192.0.2.1/24 dst 192.0.2.2/24 +# offload dev bond0 dir out +# ip x p add dir out src 192.0.2.1/24 dst 192.0.2.2/24 \ +# tmpl proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 +# +#------------------------------------------------------------------- + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh +srcip=192.0.2.1 +dstip=192.0.2.2 +ipsec0=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ipsec +ipsec1=/sys/kernel/debug/netdevsim/netdevsim0/ports/1/ipsec +active_slave="" + +# shellcheck disable=SC2317 +active_slave_changed() +{ + local old_active_slave=$1 + local new_active_slave + + # shellcheck disable=SC2154 + new_active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + [ "$new_active_slave" != "$old_active_slave" ] && [ "$new_active_slave" != "null" ] +} + +test_offload() +{ + # use ping to exercise the Tx path + ip netns exec "$ns" ping -I bond0 -c 3 -W 1 -i 0 "$dstip" >/dev/null + + active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + + if [ "$active_slave" = "$nic0" ]; then + sysfs=$ipsec0 + elif [ "$active_slave" = "$nic1" ]; then + sysfs=$ipsec1 + else + check_err 1 "bond_ipsec_offload invalid active_slave $active_slave" + fi + + # The tx/rx order in sysfs may changed after failover + grep -q "SA count=2 tx=3" "$sysfs" && grep -q "tx ipaddr=$dstip" "$sysfs" + check_err $? "incorrect tx count with link ${active_slave}" + + log_test bond_ipsec_offload "active_slave ${active_slave}" +} + +setup_env() +{ + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + defer umount /sys/kernel/debug/ + + fi + + # setup netdevsim since dummy/veth dev doesn't have offload support + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + if ! modprobe -q netdevsim; then + echo "SKIP: can't load netdevsim for ipsec offload" + # shellcheck disable=SC2154 + exit "$ksft_skip" + fi + defer modprobe -r netdevsim + fi + + setup_ns ns + defer cleanup_ns "$ns" +} + +setup_bond() +{ + ip -n "$ns" link add bond0 type bond mode active-backup miimon 100 + ip -n "$ns" addr add "$srcip/24" dev bond0 + ip -n "$ns" link set bond0 up + + echo "0 2" | ip netns exec "$ns" tee /sys/bus/netdevsim/new_device >/dev/null + nic0=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | head -n 1) + nic1=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | tail -n 1) + ip -n "$ns" link set "$nic0" master bond0 + ip -n "$ns" link set "$nic1" master bond0 + + # we didn't create a peer, make sure we can Tx by adding a permanent + # neighbour this need to be added after enslave + ip -n "$ns" neigh add "$dstip" dev bond0 lladdr 00:11:22:33:44:55 + + # create offloaded SAs, both in and out + ip -n "$ns" x p add dir out src "$srcip/24" dst "$dstip/24" \ + tmpl proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x p add dir in src "$dstip/24" dst "$srcip/24" \ + tmpl proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x s add proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$srcip/24" dst "$dstip/24" \ + offload dev bond0 dir out + + ip -n "$ns" x s add proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$dstip/24" dst "$srcip/24" \ + offload dev bond0 dir in + + # does offload show up in ip output + lines=$(ip -n "$ns" x s list | grep -c "crypto offload parameters: dev bond0 dir") + if [ "$lines" -ne 2 ] ; then + check_err 1 "bond_ipsec_offload SA offload missing from list output" + fi +} + +trap defer_scopes_cleanup EXIT +setup_env +setup_bond + +# start Offload testing +test_offload + +# do failover and re-test +ip -n "$ns" link set "$active_slave" down +slowwait 5 active_slave_changed "$active_slave" +test_offload + +# make sure offload get removed from driver +ip -n "$ns" x s flush +ip -n "$ns" x p flush +line0=$(grep -c "SA count=0" "$ipsec0") +line1=$(grep -c "SA count=0" "$ipsec1") +[ "$line0" -ne 1 ] || [ "$line1" -ne 1 ] +check_fail $? "bond_ipsec_offload SA not removed from driver" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh new file mode 100755 index 000000000000..a483d505c6a8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing if bond lacp per port priority works +# +# Switch (s_ns) Backup Switch (b_ns) +# +-------------------------+ +-------------------------+ +# | bond0 | | bond0 | +# | + | | + | +# | eth0 | eth1 | | eth0 | eth1 | +# | +---+---+ | | +---+---+ | +# | | | | | | | | +# +-------------------------+ +-------------------------+ +# | | | | +# +-----------------------------------------------------+ +# | | | | | | +# | +-------+---------+---------+-------+ | +# | eth0 eth1 | eth2 eth3 | +# | + | +# | bond0 | +# +-----------------------------------------------------+ +# Client (c_ns) + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh + +setup_links() +{ + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}" + ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}" + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast ad_select actor_port_prio + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + ip -n "${c_ns}" link set eth2 master bond0 + ip -n "${c_ns}" link set eth3 master bond0 + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + ip -n "${b_ns}" link set eth0 master bond0 + ip -n "${b_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link set bond0 up + ip -n "${s_ns}" link set bond0 up + ip -n "${b_ns}" link set bond0 up +} + +test_port_prio_setting() +{ + RET=0 + ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 1000 ] && RET=1 + ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 10 ] && RET=1 +} + +test_agg_reselect() +{ + local bond_agg_id slave_agg_id + local expect_slave="$1" + RET=0 + + # Trigger link state change to reselect the aggregator + ip -n "${c_ns}" link set eth1 down + sleep 0.5 + ip -n "${c_ns}" link set eth1 up + sleep 0.5 + + bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \ + ".[].linkinfo.info_data.ad_info.aggregator") + slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \ + ".[].linkinfo.info_slave_data.ad_aggregator_id") + # shellcheck disable=SC2034 + [ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \ + RET=1 +} + +trap cleanup_all_ns EXIT +setup_ns c_ns s_ns b_ns +setup_links + +test_port_prio_setting +log_test "bond 802.3ad" "actor_port_prio setting" + +test_agg_reselect eth0 +log_test "bond 802.3ad" "actor_port_prio select" + +# Change the actor port prio and re-test +ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10 +ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000 +test_agg_reselect eth2 +log_test "bond 802.3ad" "actor_port_prio switch" + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 832fa1caeb66..e5b7a8db4dfa 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -11,3 +11,7 @@ CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y CONFIG_VETH=y CONFIG_VLAN_8021Q=m +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y +CONFIG_XFRM_USER=m +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index f27172ddee0a..601431248d5b 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -1,6 +1,9 @@ +CONFIG_CONFIGFS_FS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n +CONFIG_INET_PSP=y CONFIG_IPV6=y CONFIG_NETDEVSIM=m -CONFIG_CONFIGFS_FS=y CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 7c90a040ce45..a2011474e625 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -3,6 +3,7 @@ import errno import os +from typing import Union from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv @@ -58,7 +59,39 @@ def get_hds_thresh(cfg, netnl) -> None: if 'hds-thresh' not in rings: raise KsftSkipEx('hds-thresh not supported by device') + +def _hds_reset(cfg, netnl, rings) -> None: + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + + arg = {'header': {'dev-index': cfg.ifindex}} + if cur.get('tcp-data-split') != rings.get('tcp-data-split'): + # Try to reset to "unknown" first, we don't know if the setting + # was the default or user chose it. Default seems more likely. + arg['tcp-data-split'] = "unknown" + netnl.rings_set(arg) + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if cur['tcp-data-split'] == rings['tcp-data-split']: + del arg['tcp-data-split'] + else: + # Try the explicit setting + arg['tcp-data-split'] = rings['tcp-data-split'] + if cur.get('hds-thresh') != rings.get('hds-thresh'): + arg['hds-thresh'] = rings['hds-thresh'] + if len(arg) > 1: + netnl.rings_set(arg) + + +def _defer_reset_hds(cfg, netnl) -> Union[dict, None]: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' in rings or 'tcp-data-split' in rings: + defer(_hds_reset, cfg, netnl, rings) + except NlError as e: + pass + + def set_hds_enable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) except NlError as e: @@ -76,6 +109,7 @@ def set_hds_enable(cfg, netnl) -> None: ksft_eq('enabled', rings['tcp-data-split']) def set_hds_disable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) except NlError as e: @@ -93,6 +127,7 @@ def set_hds_disable(cfg, netnl) -> None: ksft_eq('disabled', rings['tcp-data-split']) def set_hds_thresh_zero(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) except NlError as e: @@ -110,6 +145,7 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) def set_hds_thresh_random(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -140,6 +176,7 @@ def set_hds_thresh_random(cfg, netnl) -> None: ksft_eq(hds_thresh, rings['hds-thresh']) def set_hds_thresh_max(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -157,6 +194,7 @@ def set_hds_thresh_max(cfg, netnl) -> None: ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) def set_hds_thresh_gt(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -178,6 +216,7 @@ def set_xdp(cfg, netnl) -> None: """ mode = _get_hds_mode(cfg, netnl) if mode == 'enabled': + _defer_reset_hds(cfg, netnl) netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'unknown'}) diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index fdc97355588c..ee09a40d532c 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -15,9 +15,11 @@ TEST_PROGS = \ iou-zcrx.py \ irq.py \ loopback.sh \ + nic_timestamp.py \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ + rss_flow_label.py \ rss_input_xfrm.py \ tso.py \ xsk_reconfig.py \ diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index 88ae719e6f8f..e8a06aa1471c 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -1,5 +1,7 @@ +CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y +CONFIG_UDMABUF=y CONFIG_VXLAN=y diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index baa2f24240ba..45c2d49d55b6 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -24,7 +24,7 @@ def check_rx(cfg) -> None: require_devmem(cfg) port = rand_port() - socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}" + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}" listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" with bkg(listen_cmd, exit_wait=True) as ncdevmem: @@ -42,9 +42,9 @@ def check_tx(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") @@ -56,9 +56,9 @@ def check_tx_chunks(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd, exit_wait=True) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 1462a339a74b..0ceb297e7757 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -13,7 +13,7 @@ try: # Import one by one to avoid pylint false positives from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily, DevlinkFamily + NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ rand_port, tool, wait_port_listen @@ -22,7 +22,7 @@ try: from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ - ksft_ne, ksft_not_in, ksft_raises, ksft_true + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none from net.lib.py import NetNSEnter from drivers.net.lib.py import GenerateTraffic from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 72f828021f83..3288ed04ce08 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -39,6 +39,7 @@ #define __EXPORTED_HEADERS__ #include <linux/uio.h> +#include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -97,6 +98,10 @@ static unsigned int dmabuf_id; static uint32_t tx_dmabuf_id; static int waittime_ms = 500; +/* System state loaded by current_config_load() */ +#define MAX_FLOWS 8 +static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, }; + struct memory_buffer { int fd; size_t size; @@ -115,6 +120,21 @@ struct memory_provider { size_t off, int n); }; +static void pr_err(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", TEST_PREFIX); + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + if (errno != 0) + fprintf(stderr, ": %s", strerror(errno)); + fprintf(stderr, "\n"); +} + static struct memory_buffer *udmabuf_alloc(size_t size) { struct udmabuf_create create; @@ -123,27 +143,33 @@ static struct memory_buffer *udmabuf_alloc(size_t size) ctx = malloc(sizeof(*ctx)); if (!ctx) - error(1, ENOMEM, "malloc failed"); + return NULL; ctx->size = size; ctx->devfd = open("/dev/udmabuf", O_RDWR); - if (ctx->devfd < 0) - error(1, errno, - "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); + if (ctx->devfd < 0) { + pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]"); + goto err_free_ctx; + } ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); - if (ctx->memfd < 0) - error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); + if (ctx->memfd < 0) { + pr_err("[skip,no-memfd]"); + goto err_close_dev; + } ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); - if (ret < 0) - error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + if (ret < 0) { + pr_err("[skip,fcntl-add-seals]"); + goto err_close_memfd; + } ret = ftruncate(ctx->memfd, size); - if (ret == -1) - error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + if (ret == -1) { + pr_err("[FAIL,memfd-truncate]"); + goto err_close_memfd; + } memset(&create, 0, sizeof(create)); @@ -151,15 +177,29 @@ static struct memory_buffer *udmabuf_alloc(size_t size) create.offset = 0; create.size = size; ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); - if (ctx->fd < 0) - error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); + if (ctx->fd < 0) { + pr_err("[FAIL, create udmabuf]"); + goto err_close_fd; + } ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ctx->fd, 0); - if (ctx->buf_mem == MAP_FAILED) - error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); + if (ctx->buf_mem == MAP_FAILED) { + pr_err("[FAIL, map udmabuf]"); + goto err_close_fd; + } return ctx; + +err_close_fd: + close(ctx->fd); +err_close_memfd: + close(ctx->memfd); +err_close_dev: + close(ctx->devfd); +err_free_ctx: + free(ctx); + return NULL; } static void udmabuf_free(struct memory_buffer *ctx) @@ -217,7 +257,7 @@ static void print_nonzero_bytes(void *ptr, size_t size) putchar(p[i]); } -void validate_buffer(void *line, size_t size) +int validate_buffer(void *line, size_t size) { static unsigned char seed = 1; unsigned char *ptr = line; @@ -232,8 +272,10 @@ void validate_buffer(void *line, size_t size) "Failed validation: expected=%u, actual=%u, index=%lu\n", expected, ptr[i], i); errors++; - if (errors > 20) - error(1, 0, "validation failed."); + if (errors > 20) { + pr_err("validation failed"); + return -1; + } } seed++; if (seed == do_validation) @@ -241,6 +283,86 @@ void validate_buffer(void *line, size_t size) } fprintf(stdout, "Validated buffer\n"); + return 0; +} + +static int +__run_command(char *out, size_t outlen, const char *cmd, va_list args) +{ + char command[256]; + FILE *fp; + + vsnprintf(command, sizeof(command), cmd, args); + + fprintf(stderr, "Running: %s\n", command); + fp = popen(command, "r"); + if (!fp) + return -1; + if (out) { + size_t len; + + if (!fgets(out, outlen, fp)) + return -1; + + /* Remove trailing newline if present */ + len = strlen(out); + if (len && out[len - 1] == '\n') + out[len - 1] = '\0'; + } + return pclose(fp); +} + +static int run_command(const char *cmd, ...) +{ + va_list args; + int ret; + + va_start(args, cmd); + ret = __run_command(NULL, 0, cmd, args); + va_end(args); + + return ret; +} + +static int ethtool_add_flow(const char *format, ...) +{ + char local_output[256], cmd[256]; + const char *id_start; + int flow_idx, ret; + char *endptr; + long flow_id; + va_list args; + + for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++) + if (ntuple_ids[flow_idx] == -1) + break; + if (flow_idx == MAX_FLOWS) { + fprintf(stderr, "Error: too many flows\n"); + return -1; + } + + snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format); + + va_start(args, format); + ret = __run_command(local_output, sizeof(local_output), cmd, args); + va_end(args); + + if (ret != 0) + return ret; + + /* Extract the ID from the output */ + id_start = strstr(local_output, "Added rule with ID "); + if (!id_start) + return -1; + id_start += strlen("Added rule with ID "); + + flow_id = strtol(id_start, &endptr, 10); + if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX) + return -1; + + fprintf(stderr, "Added flow rule with ID %ld\n", flow_id); + ntuple_ids[flow_idx] = flow_id; + return flow_id; } static int rxq_num(int ifindex) @@ -270,29 +392,17 @@ static int rxq_num(int ifindex) return num; } -#define run_command(cmd, ...) \ - ({ \ - char command[256]; \ - memset(command, 0, sizeof(command)); \ - snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ - fprintf(stderr, "Running: %s\n", command); \ - system(command); \ - }) - -static int reset_flow_steering(void) +static void reset_flow_steering(void) { - /* Depending on the NIC, toggling ntuple off and on might not - * be allowed. Additionally, attempting to delete existing filters - * will fail if no filters are present. Therefore, do not enforce - * the exit status. - */ - - run_command("sudo ethtool -K %s ntuple off >&2", ifname); - run_command("sudo ethtool -K %s ntuple on >&2", ifname); - run_command( - "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", - ifname, ifname); - return 0; + int i; + + for (i = 0; i < MAX_FLOWS; i++) { + if (ntuple_ids[i] == -1) + continue; + run_command("ethtool -N %s delete %d", + ifname, ntuple_ids[i]); + ntuple_ids[i] = -1; + } } static const char *tcp_data_split_str(int val) @@ -309,7 +419,81 @@ static const char *tcp_data_split_str(int val) } } -static int configure_headersplit(bool on) +static struct ethtool_rings_get_rsp *get_ring_config(void) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return NULL; + } + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + ynl_sock_destroy(ys); + + return get_rsp; +} + +static void restore_ring_config(const struct ethtool_rings_get_rsp *config) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + if (!config) + return; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + if (config->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh); + + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg); + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + /* use explicit value if UKNOWN didn't give us the previous */ + if (get_rsp->tcp_data_split != config->tcp_data_split) { + ethtool_rings_set_req_set_tcp_data_split(req, + config->tcp_data_split); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring expl HDS cfg: %s\n", + ys->err.msg); + } + + ethtool_rings_get_rsp_free(get_rsp); + ethtool_rings_set_req_free(req); + + ynl_sock_destroy(ys); +} + +static int +configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on) { struct ethtool_rings_get_req *get_req; struct ethtool_rings_get_rsp *get_rsp; @@ -326,8 +510,15 @@ static int configure_headersplit(bool on) req = ethtool_rings_set_req_alloc(); ethtool_rings_set_req_set_header_dev_index(req, ifindex); - /* 0 - off, 1 - auto, 2 - on */ - ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0); + if (on) { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_ENABLED); + if (old->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, 0); + } else { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + } ret = ethtool_rings_set(ys, req); if (ret < 0) fprintf(stderr, "YNL failed: %s\n", ys->err.msg); @@ -351,12 +542,103 @@ static int configure_headersplit(bool on) static int configure_rss(void) { - return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); + return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); +} + +static void reset_rss(void) +{ + run_command("ethtool -X %s default >&2", ifname, start_queue); } -static int configure_channels(unsigned int rx, unsigned int tx) +static int check_changing_channels(unsigned int rx, unsigned int tx) { - return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); + struct ethtool_channels_get_req *gchan; + struct ethtool_channels_set_req *schan; + struct ethtool_channels_get_rsp *chan; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + gchan = ethtool_channels_get_req_alloc(); + if (!gchan) { + ret = -1; + goto exit_close_sock; + } + + ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); + chan = ethtool_channels_get(ys, gchan); + ethtool_channels_get_req_free(gchan); + if (!chan) { + fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); + ret = -1; + goto exit_close_sock; + } + + schan = ethtool_channels_set_req_alloc(); + if (!schan) { + ret = -1; + goto exit_free_chan; + } + + ethtool_channels_set_req_set_header_dev_index(schan, ifindex); + + if (chan->_present.combined_count) { + if (chan->_present.rx_count || chan->_present.tx_count) { + ethtool_channels_set_req_set_rx_count(schan, 0); + ethtool_channels_set_req_set_tx_count(schan, 0); + } + + if (rx == tx) { + ethtool_channels_set_req_set_combined_count(schan, rx); + } else if (rx > tx) { + ethtool_channels_set_req_set_combined_count(schan, tx); + ethtool_channels_set_req_set_rx_count(schan, rx - tx); + } else { + ethtool_channels_set_req_set_combined_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx - rx); + } + + } else if (chan->_present.rx_count) { + ethtool_channels_set_req_set_rx_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx); + } else { + fprintf(stderr, "Error: device has neither combined nor rx channels\n"); + ret = -1; + goto exit_free_schan; + } + + ret = ethtool_channels_set(ys, schan); + if (ret) { + fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); + } else { + /* We were expecting a failure, go back to previous settings */ + ethtool_channels_set_req_set_combined_count(schan, + chan->combined_count); + ethtool_channels_set_req_set_rx_count(schan, chan->rx_count); + ethtool_channels_set_req_set_tx_count(schan, chan->tx_count); + + ret = ethtool_channels_set(ys, schan); + if (ret) + fprintf(stderr, "YNL un-setting channels: %s\n", + ys->err.msg); + } + +exit_free_schan: + ethtool_channels_set_req_free(schan); +exit_free_chan: + ethtool_channels_get_rsp_free(chan); +exit_close_sock: + ynl_sock_destroy(ys); + + return ret; } static int configure_flow_steering(struct sockaddr_in6 *server_sin) @@ -364,6 +646,7 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) const char *type = "tcp6"; const char *server_addr; char buf[40]; + int flow_id; inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); server_addr = buf; @@ -374,23 +657,22 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) } /* Try configure 5-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", - ifname, - type, - client_ip ? "src-ip" : "", - client_ip ?: "", - server_addr, - client_ip ? "src-port" : "", - client_ip ? port : "", - port, start_queue)) + flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d", + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); + if (flow_id < 0) { /* If that fails, try configure 3-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2", - ifname, - type, - server_addr, - port, start_queue)) + flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d", + type, server_addr, port, start_queue); + if (flow_id < 0) /* If that fails, return error */ return -1; + } return 0; } @@ -405,6 +687,7 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, *ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!*ys) { + netdev_queue_id_free(queues); fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } @@ -483,18 +766,24 @@ err_close: return -1; } -static void enable_reuseaddr(int fd) +static int enable_reuseaddr(int fd) { int opt = 1; int ret; ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEPORT failed"); + return -1; + } ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEADDR failed"); + return -1; + } + + return 0; } static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) @@ -537,6 +826,7 @@ static struct netdev_queue_id *create_queues(void) static int do_server(struct memory_buffer *mem) { + struct ethtool_rings_get_rsp *ring_config; char ctrl_data[sizeof(int) * 20000]; size_t non_page_aligned_frags = 0; struct sockaddr_in6 client_addr; @@ -548,54 +838,72 @@ static int do_server(struct memory_buffer *mem) char *tmp_mem = NULL; struct ynl_sock *ys; char iobuf[819200]; + int ret, err = -1; char buffer[256]; int socket_fd; int client_fd; - int ret; ret = parse_address(server_ip, atoi(port), &server_sin); - if (ret < 0) - error(1, 0, "parse server address"); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } - if (reset_flow_steering()) - error(1, 0, "Failed to reset flow steering\n"); + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + return -1; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to enable TCP header split\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to enable TCP header split"); + goto err_free_ring_config; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "Failed to configure rss\n"); + if (configure_rss()) { + pr_err("Failed to configure rss"); + goto err_reset_headersplit; + } /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering(&server_sin)) - error(1, 0, "Failed to configure flow steering\n"); - - sleep(1); + if (configure_flow_steering(&server_sin)) { + pr_err("Failed to configure flow steering"); + goto err_reset_rss; + } - if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_flow_steering; + } tmp_mem = malloc(mem->size); if (!tmp_mem) - error(1, ENOMEM, "malloc failed"); + goto err_unbind; socket_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (socket_fd < 0) - error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + if (socket_fd < 0) { + pr_err("Failed to create socket"); + goto err_free_tmp; + } - enable_reuseaddr(socket_fd); + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; fprintf(stderr, "binding to address %s:%d\n", server_ip, ntohs(server_sin.sin6_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to bind"); + goto err_close_socket; + } ret = listen(socket_fd, 1); - if (ret) - error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to listen"); + goto err_close_socket; + } client_addr_len = sizeof(client_addr); @@ -604,6 +912,10 @@ static int do_server(struct memory_buffer *mem) fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, ntohs(server_sin.sin6_port)); client_fd = accept(socket_fd, &client_addr, &client_addr_len); + if (client_fd < 0) { + pr_err("Failed to accept"); + goto err_close_socket; + } inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, sizeof(buffer)); @@ -631,10 +943,15 @@ static int do_server(struct memory_buffer *mem) continue; if (ret < 0) { perror("recvmsg"); + if (errno == EFAULT) { + pr_err("received EFAULT, won't recover"); + goto err_close_client; + } continue; } if (ret == 0) { - fprintf(stderr, "client exited\n"); + errno = 0; + pr_err("client exited"); goto cleanup; } @@ -672,9 +989,10 @@ static int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, total_received, dmabuf_cmsg->dmabuf_id); - if (dmabuf_cmsg->dmabuf_id != dmabuf_id) - error(1, 0, - "received on wrong dmabuf_id: flow steering error\n"); + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) { + pr_err("received on wrong dmabuf_id: flow steering error"); + goto err_close_client; + } if (dmabuf_cmsg->frag_size % getpagesize()) non_page_aligned_frags++; @@ -685,22 +1003,27 @@ static int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size); - if (do_validation) - validate_buffer(tmp_mem, - dmabuf_cmsg->frag_size); - else + if (do_validation) { + if (validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size)) + goto err_close_client; + } else { print_nonzero_bytes(tmp_mem, dmabuf_cmsg->frag_size); + } ret = setsockopt(client_fd, SOL_SOCKET, SO_DEVMEM_DONTNEED, &token, sizeof(token)); - if (ret != 1) - error(1, 0, - "SO_DEVMEM_DONTNEED not enough tokens"); + if (ret != 1) { + pr_err("SO_DEVMEM_DONTNEED not enough tokens"); + goto err_close_client; + } + } + if (!is_devmem) { + pr_err("flow steering error"); + goto err_close_client; } - if (!is_devmem) - error(1, 0, "flow steering error\n"); fprintf(stderr, "total_received=%lu\n", total_received); } @@ -711,54 +1034,121 @@ static int do_server(struct memory_buffer *mem) page_aligned_frags, non_page_aligned_frags); cleanup: + err = 0; - free(tmp_mem); +err_close_client: close(client_fd); +err_close_socket: close(socket_fd); +err_free_tmp: + free(tmp_mem); +err_unbind: ynl_sock_destroy(ys); - - return 0; +err_reset_flow_steering: + reset_flow_steering(); +err_reset_rss: + reset_rss(); +err_reset_headersplit: + restore_ring_config(ring_config); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); + return err; } -void run_devmem_tests(void) +int run_devmem_tests(void) { + struct ethtool_rings_get_rsp *ring_config; + struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; + int err = -1; mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return -1; + } + + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + goto err_free_mem; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "rss error\n"); + if (configure_rss()) { + pr_err("rss error"); + goto err_free_ring_config; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_rss; + } + + queues = netdev_queue_id_alloc(num_queues); + if (!queues) { + pr_err("Failed to allocate empty queues array"); + goto err_reset_headersplit; + } + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Binding empty queues array should have failed"); + goto err_unbind; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + if (configure_headersplit(ring_config, 0)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } - if (!bind_rx_queue(ifindex, mem->fd, - calloc(num_queues, sizeof(struct netdev_queue_id)), - num_queues, &ys)) - error(1, 0, "Binding empty queues array should have failed\n"); + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } - if (configure_headersplit(0)) - error(1, 0, "Failed to configure header split\n"); + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Configure dmabuf with header split off should have failed"); + goto err_unbind; + } - if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Configure dmabuf with header split off should have failed\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } - if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_headersplit; + } /* Deactivating a bound queue should not be legal */ - if (!configure_channels(num_queues, num_queues - 1)) - error(1, 0, "Deactivating a bound queue should be illegal.\n"); + if (!check_changing_channels(num_queues, num_queues)) { + pr_err("Deactivating a bound queue should be illegal"); + goto err_unbind; + } - /* Closing the netlink socket does an implicit unbind */ - ynl_sock_destroy(ys); + err = 0; + goto err_unbind; +err_unbind: + ynl_sock_destroy(ys); +err_reset_headersplit: + restore_ring_config(ring_config); +err_reset_rss: + reset_rss(); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); +err_free_mem: provider->free(mem); + return err; } static uint64_t gettimeofday_ms(void) @@ -778,13 +1168,15 @@ static int do_poll(int fd) pfd.fd = fd; ret = poll(&pfd, 1, waittime_ms); - if (ret == -1) - error(1, errno, "poll"); + if (ret == -1) { + pr_err("poll"); + return -1; + } return ret && (pfd.revents & POLLERR); } -static void wait_compl(int fd) +static int wait_compl(int fd) { int64_t tstop = gettimeofday_ms() + waittime_ms; char control[CMSG_SPACE(100)] = {}; @@ -798,18 +1190,23 @@ static void wait_compl(int fd) msg.msg_controllen = sizeof(control); while (gettimeofday_ms() < tstop) { - if (!do_poll(fd)) + ret = do_poll(fd); + if (ret < 0) + return ret; + if (!ret) continue; ret = recvmsg(fd, &msg, MSG_ERRQUEUE); if (ret < 0) { if (errno == EAGAIN) continue; - error(1, errno, "recvmsg(MSG_ERRQUEUE)"); - return; + pr_err("recvmsg(MSG_ERRQUEUE)"); + return -1; + } + if (msg.msg_flags & MSG_CTRUNC) { + pr_err("MSG_CTRUNC"); + return -1; } - if (msg.msg_flags & MSG_CTRUNC) - error(1, 0, "MSG_CTRUNC\n"); for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_IP && @@ -823,20 +1220,25 @@ static void wait_compl(int fd) continue; serr = (void *)CMSG_DATA(cm); - if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) - error(1, 0, "wrong origin %u", serr->ee_origin); - if (serr->ee_errno != 0) - error(1, 0, "wrong errno %d", serr->ee_errno); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + pr_err("wrong origin %u", serr->ee_origin); + return -1; + } + if (serr->ee_errno != 0) { + pr_err("wrong errno %d", serr->ee_errno); + return -1; + } hi = serr->ee_data; lo = serr->ee_info; fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); - return; + return 0; } } - error(1, 0, "did not receive tx completion"); + pr_err("did not receive tx completion"); + return -1; } static int do_client(struct memory_buffer *mem) @@ -850,50 +1252,69 @@ static int do_client(struct memory_buffer *mem) ssize_t line_size = 0; struct cmsghdr *cmsg; char *line = NULL; + int ret, err = -1; size_t len = 0; int socket_fd; __u32 ddmabuf; int opt = 1; - int ret; ret = parse_address(server_ip, atoi(port), &server_sin); - if (ret < 0) - error(1, 0, "parse server address"); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } + + if (client_ip) { + ret = parse_address(client_ip, atoi(port), &client_sin); + if (ret < 0) { + pr_err("parse client address"); + return ret; + } + } socket_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (socket_fd < 0) - error(1, socket_fd, "create socket"); + if (socket_fd < 0) { + pr_err("create socket"); + return -1; + } - enable_reuseaddr(socket_fd); + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname) + 1); - if (ret) - error(1, errno, "bindtodevice"); + if (ret) { + pr_err("bindtodevice"); + goto err_close_socket; + } - if (bind_tx_queue(ifindex, mem->fd, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_tx_queue(ifindex, mem->fd, &ys)) { + pr_err("Failed to bind"); + goto err_close_socket; + } if (client_ip) { - ret = parse_address(client_ip, atoi(port), &client_sin); - if (ret < 0) - error(1, 0, "parse client address"); - ret = bind(socket_fd, &client_sin, sizeof(client_sin)); - if (ret) - error(1, errno, "bind"); + if (ret) { + pr_err("bind"); + goto err_unbind; + } } ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); - if (ret) - error(1, errno, "set sock opt"); + if (ret) { + pr_err("set sock opt"); + goto err_unbind; + } fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, ntohs(server_sin.sin6_port), ifname); ret = connect(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(1, errno, "connect"); + if (ret) { + pr_err("connect"); + goto err_unbind; + } while (1) { free(line); @@ -906,10 +1327,11 @@ static int do_client(struct memory_buffer *mem) if (max_chunk) { msg.msg_iovlen = (line_size + max_chunk - 1) / max_chunk; - if (msg.msg_iovlen > MAX_IOV) - error(1, 0, - "can't partition %zd bytes into maximum of %d chunks", - line_size, MAX_IOV); + if (msg.msg_iovlen > MAX_IOV) { + pr_err("can't partition %zd bytes into maximum of %d chunks", + line_size, MAX_IOV); + goto err_free_line; + } for (int i = 0; i < msg.msg_iovlen; i++) { iov[i].iov_base = (void *)(i * max_chunk); @@ -940,34 +1362,40 @@ static int do_client(struct memory_buffer *mem) *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); - if (ret < 0) - error(1, errno, "Failed sendmsg"); + if (ret < 0) { + pr_err("Failed sendmsg"); + goto err_free_line; + } fprintf(stderr, "sendmsg_ret=%d\n", ret); - if (ret != line_size) - error(1, errno, "Did not send all bytes %d vs %zd", ret, - line_size); + if (ret != line_size) { + pr_err("Did not send all bytes %d vs %zd", ret, line_size); + goto err_free_line; + } - wait_compl(socket_fd); + if (wait_compl(socket_fd)) + goto err_free_line; } fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); + err = 0; + +err_free_line: free(line); +err_unbind: + ynl_sock_destroy(ys); +err_close_socket: close(socket_fd); - - if (ys) - ynl_sock_destroy(ys); - - return 0; + return err; } int main(int argc, char *argv[]) { struct memory_buffer *mem; int is_server = 0, opt; - int ret; + int ret, err = 1; while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { switch (opt) { @@ -1004,8 +1432,10 @@ int main(int argc, char *argv[]) } } - if (!ifname) - error(1, 0, "Missing -f argument\n"); + if (!ifname) { + pr_err("Missing -f argument"); + return 1; + } ifindex = if_nametoindex(ifname); @@ -1014,33 +1444,41 @@ int main(int argc, char *argv[]) if (!server_ip && !client_ip) { if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 0) - error(1, 0, "couldn't detect number of queues\n"); - if (num_queues < 2) - error(1, 0, - "number of device queues is too low\n"); + if (num_queues < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } /* make sure can bind to multiple queues */ start_queue = num_queues / 2; num_queues /= 2; } - if (start_queue < 0 || num_queues < 0) - error(1, 0, "Both -t and -q are required\n"); + if (start_queue < 0 || num_queues < 0) { + pr_err("Both -t and -q are required"); + return 1; + } - run_devmem_tests(); - return 0; + return run_devmem_tests(); } if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 2) - error(1, 0, "number of device queues is too low\n"); + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } num_queues = 1; start_queue = rxq_num(ifindex) - num_queues; - if (start_queue < 0) - error(1, 0, "couldn't detect number of queues\n"); + if (start_queue < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); } @@ -1048,21 +1486,39 @@ int main(int argc, char *argv[]) for (; optind < argc; optind++) fprintf(stderr, "extra arguments: %s\n", argv[optind]); - if (start_queue < 0) - error(1, 0, "Missing -t argument\n"); + if (start_queue < 0) { + pr_err("Missing -t argument"); + return 1; + } - if (num_queues < 0) - error(1, 0, "Missing -q argument\n"); + if (num_queues < 0) { + pr_err("Missing -q argument"); + return 1; + } - if (!server_ip) - error(1, 0, "Missing -s argument\n"); + if (!server_ip) { + pr_err("Missing -s argument"); + return 1; + } - if (!port) - error(1, 0, "Missing -p argument\n"); + if (!port) { + pr_err("Missing -p argument"); + return 1; + } mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return 1; + } + ret = is_server ? do_server(mem) : do_client(mem); - provider->free(mem); + if (ret) + goto err_free_mem; - return ret; + err = 0; + +err_free_mem: + provider->free(mem); + return err; } diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py new file mode 100755 index 000000000000..c1e943d53f19 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests related to configuration of HW timestamping +""" + +import errno +from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx +from lib.py import NetDrvEnv, EthtoolFamily, NlError + + +def __get_hwtimestamp_support(cfg): + """ Retrieve supported configuration information """ + + try: + tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported") from e + raise + + ctx = {} + tx = tsinfo.get('tx-types', {}) + rx = tsinfo.get('rx-filters', {}) + + bits = tx.get('bits', {}) + ctx['tx'] = bits.get('bit', []) + bits = rx.get('bits', {}) + ctx['rx'] = bits.get('bit', []) + return ctx + + +def __get_hwtimestamp_config(cfg): + """ Retrieve current TS configuration information """ + + try: + tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return tscfg + + +def __set_hwtimestamp_config(cfg, ts): + """ Setup new TS configuration information """ + + ts['header'] = {'dev-name': cfg.ifname} + try: + res = cfg.ethnl.tsconfig_set(ts) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return res + + +def test_hwtstamp_tx(cfg): + """ + Test TX timestamp configuration. + The driver should apply provided config and report back proper state. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + tx = ts['tx'] + for t in tx: + tscfg = orig_tscfg + tscfg['tx-types']['bits']['bit'] = [t] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + ksft_eq(res['tx-types']['bits']['bit'], [t]) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def test_hwtstamp_rx(cfg): + """ + Test RX timestamp configuration. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + rx = ts['rx'] + for r in rx: + tscfg = orig_tscfg + tscfg['rx-filters']['bits']['bit'] = [r] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + if r['index'] == 0 or r['index'] == 1: + ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + else: + # the driver can fallback to some value which has higher coverage for timestamping + ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 7bb552f8b182..ed7e405682f0 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -118,7 +118,7 @@ def test_rss_key_indir(cfg): qcnt = len(_get_rx_cnts(cfg)) if qcnt < 3: - KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") + raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -178,8 +178,13 @@ def test_rss_key_indir(cfg): cnts = _get_rx_cnts(cfg) GenerateTraffic(cfg).wait_pkts_and_stop(20000) cnts = _get_rx_cnts(cfg, prev=cnts) - # First two queues get less traffic than all the rest - ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts)) + if qcnt > 4: + # First two queues get less traffic than all the rest + ksft_lt(sum(cnts[:2]), sum(cnts[2:]), + "traffic distributed: " + str(cnts)) + else: + # When queue count is low make sure third queue got significant pkts + ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts)) def test_rss_queue_reconfigure(cfg, main_ctx=True): @@ -335,19 +340,20 @@ def test_hitless_key_update(cfg): data = get_rss(cfg) key_len = len(data['rss-hash-key']) - key = _rss_key_rand(key_len) + ethnl = EthtoolFamily() + key = random.randbytes(key_len) tgen = GenerateTraffic(cfg) try: errors0, carrier0 = get_drop_err_sum(cfg) t0 = datetime.datetime.now() - ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key}) t1 = datetime.datetime.now() errors1, carrier1 = get_drop_err_sum(cfg) finally: tgen.wait_pkts_and_stop(5000) - ksft_lt((t1 - t0).total_seconds(), 0.2) + ksft_lt((t1 - t0).total_seconds(), 0.15) ksft_eq(errors1 - errors1, 0) ksft_eq(carrier1 - carrier0, 0) diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py new file mode 100755 index 000000000000..6fa95fe27c47 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for RSS hashing on IPv6 Flow Label. +""" + +import glob +import os +import socket +from lib.py import CmdExitFailure +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \ + ksft_not_in, ksft_raises, KsftSkipEx +from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port +from lib.py import NetDrvEpEnv + + +def _check_system(cfg): + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt", + f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]: + try: + with open(f, 'r') as fp: + setting = fp.read().strip() + # CPU mask will be zeros and commas + if setting.replace("0", "").replace(",", ""): + raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}") + except FileNotFoundError: + pass + + # 1 is the default, if someone changed it we probably shouldn"t mess with it + af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout + if af.strip() != "1": + raise KsftSkipEx("Remote does not have auto_flowlabels enabled") + + +def _ethtool_get_cfg(cfg, fl_type): + descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + "IPv6 Flow Label": "l", + } + + ret = "" + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + ret += converter[line] + return ret + + +def _traffic(cfg, one_sock, one_cpu): + local_port = rand_port(socket.SOCK_DGRAM) + remote_port = rand_port(socket.SOCK_DGRAM) + + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v["6"], 0)) + if one_sock: + send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \ + "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-" + else: + send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \ + f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done" + + cpus = set() + with bkg(send, shell=True, host=cfg.remote, exit_wait=True): + for _ in range(20): + fd_read_timeout(sock.fileno(), 1) + cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + cpus.add(cpu) + + if one_cpu: + ksft_eq(len(cpus), 1, + f"{one_sock=} - expected one CPU, got traffic on: {cpus=}") + else: + ksft_ge(len(cpus), 2, + f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}") + + +def test_rss_flow_label(cfg): + """ + Test hashing on IPv6 flow label. Send traffic over a single socket + and over multiple sockets. Depend on the remote having auto-label + enabled so that it randomizes the label per socket. + """ + + cfg.require_ipver("6") + cfg.require_cmd("socat", remote=True) + _check_system(cfg) + + # Enable flow label hashing for UDP6 + initial = _ethtool_get_cfg(cfg, "udp6") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + + defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _traffic(cfg, one_sock=True, one_cpu=True) + _traffic(cfg, one_sock=False, one_cpu=False) + + # Disable it, we should see no hashing (reset was already defer()ed) + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") + + _traffic(cfg, one_sock=False, one_cpu=True) + + +def _check_v4_flow_types(cfg): + for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]: + try: + cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + ksft_not_in("Flow Label", cur, + comment=f"{fl_type=} has Flow Label:" + cur) + except CmdExitFailure: + # Probably does not support this flow type + pass + + +def test_rss_flow_label_6only(cfg): + """ + Test interactions with IPv4 flow types. It should not be possible to set + IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also + not appear in the IPv4 "current config". + """ + + with ksft_raises(CmdExitFailure) as cm: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl") + ksft_in("Invalid argument", cm.exception.cmd.stderr) + + _check_v4_flow_types(cfg) + + # Try to enable Flow Labels and check again, in case it leaks thru + initial = _ethtool_get_cfg(cfg, "udp6") + changed = initial.replace("l", "") if "l" in initial else initial + "l" + + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}") + restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _check_v4_flow_types(cfg) + restore.exec() + _check_v4_flow_types(cfg) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_rss_flow_label, + test_rss_flow_label_6only], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index c13dd5efa27a..0998e68ebaf0 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -60,16 +60,17 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): sock_wait_drain(sock) qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - # No math behind the 10 here, but try to catch cases where - # TCP falls back to non-LSO. - ksft_lt(tcp_sock_get_retrans(sock), 10) - sock.close() - # Check that at least 90% of the data was sent as LSO packets. # System noise may cause false negatives. Also header overheads # will add up to 5% of extra packes... The check is best effort. total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + + # Make sure we have order of magnitude more LSO packets than + # retransmits, in case TCP retransmitted all the LSO packets. + ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4) + sock.close() + if should_lso: if cfg.have_stat_super_count: ksft_ge(qstat_new['tx-hw-gso-packets'] - diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 8711c67ad658..2a645415c4ca 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -12,16 +12,16 @@ try: # Import one by one to avoid pylint false positives from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily, DevlinkFamily + NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, tool, wait_port_listen + fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file from net.lib.py import fd_read_timeout from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ - ksft_ne, ksft_not_in, ksft_raises, ksft_true + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1b8bd648048f..01be3d9b9720 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -4,7 +4,7 @@ import os import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx -from lib.py import ksft_setup +from lib.py import ksft_setup, wait_file from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -25,6 +25,9 @@ class NetDrvEnvBase: self.env = self._load_env_file() + # Following attrs must be set be inheriting classes + self.dev = None + def _load_env_file(self): env = os.environ.copy() @@ -48,6 +51,22 @@ class NetDrvEnvBase: env[pair[0]] = pair[1] return ksft_setup(env) + def __del__(self): + pass + + def __enter__(self): + ip(f"link set dev {self.dev['ifname']} up") + wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier", + lambda x: x.strip() == "1") + + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.__del__() + class NetDrvEnv(NetDrvEnvBase): """ @@ -72,17 +91,6 @@ class NetDrvEnv(NetDrvEnvBase): self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] - def __enter__(self): - ip(f"link set dev {self.dev['ifname']} up") - - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() @@ -219,15 +227,6 @@ class NetDrvEpEnv(NetDrvEnvBase): raise Exception("Can't resolve remote interface name, multiple interfaces match") return v6[0]["ifname"] if v6 else v4[0]["ifname"] - def __enter__(self): - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() @@ -246,6 +245,10 @@ class NetDrvEpEnv(NetDrvEnvBase): if not self.addr_v[ipver] or not self.remote_addr_v[ipver]: raise KsftSkipEx(f"Test requires IPv{ipver} connectivity") + def require_nsim(self): + if self._ns is None: + raise KsftXfailEx("Test only works on netdevsim") + def _require_cmd(self, comm, key, host=None): cached = self._required_cmd.get(comm, {}) if cached.get(key) is None: diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index b6071e80ebbb..8e1085e89647 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -148,12 +148,20 @@ function create_dynamic_target() { # Generate the command line argument for netconsole following: # netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr] function create_cmdline_str() { + local BINDMODE=${1:-"ifname"} + if [ "${BINDMODE}" == "ifname" ] + then + SRCDEV=${SRCIF} + else + SRCDEV=$(mac_get "${SRCIF}") + fi + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') SRCPORT="1514" TGTPORT="6666" - echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\"" + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\"" } # Do not append the release to the header of the message diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 29a672c2270f..e212ad8ccef6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -44,8 +44,7 @@ source $lib_dir/devlink_lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/24 - defer simple_if_fini $h1 192.0.2.1/24 + adf_simple_if_init $h1 192.0.2.1/24 mtu_set $h1 10000 defer mtu_restore $h1 @@ -56,8 +55,7 @@ h1_create() h2_create() { - simple_if_init $h2 198.51.100.1/24 - defer simple_if_fini $h2 198.51.100.1/24 + adf_simple_if_init $h2 198.51.100.1/24 mtu_set $h2 10000 defer mtu_restore $h2 @@ -106,8 +104,7 @@ setup_prepare() # Reload to ensure devlink-trap settings are back to default. defer devlink_reload - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index d5b6f2cc9a29..9ca340c5f3a6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -57,8 +57,7 @@ source qos_lib.sh h1_create() { - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 mtu_set $h1 10000 defer mtu_restore $h1 @@ -70,8 +69,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 10000 defer mtu_restore $h2 @@ -83,8 +81,7 @@ h2_create() h3_create() { - simple_if_init $h3 - defer simple_if_fini $h3 + adf_simple_if_init $h3 mtu_set $h3 10000 defer mtu_restore $h3 @@ -225,8 +222,7 @@ setup_prepare() h3mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh index 2b5d2c2751d5..a4a25637fe2a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh @@ -68,8 +68,7 @@ mlxsw_only_on_spectrum 2+ || exit h1_create() { - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 vlan_create $h1 111 v$h1 192.0.2.33/28 defer vlan_destroy $h1 111 @@ -78,8 +77,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 vlan_create $h2 111 v$h2 192.0.2.34/28 defer vlan_destroy $h2 111 @@ -178,8 +176,7 @@ setup_prepare() h2mac=$(mac_get $h2) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index cd4a5c21360c..d8f8ae8533cd 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -72,8 +72,7 @@ source qos_lib.sh h1_create() { - simple_if_init $h1 192.0.2.65/28 - defer simple_if_fini $h1 192.0.2.65/28 + adf_simple_if_init $h1 192.0.2.65/28 mtu_set $h1 10000 defer mtu_restore $h1 @@ -81,8 +80,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 10000 defer mtu_restore $h2 @@ -94,8 +92,7 @@ h2_create() h3_create() { - simple_if_init $h3 192.0.2.66/28 - defer simple_if_fini $h3 192.0.2.66/28 + adf_simple_if_init $h3 192.0.2.66/28 mtu_set $h3 10000 defer mtu_restore $h3 @@ -196,8 +193,7 @@ setup_prepare() h3mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 537d6baa77b7..47d2ffcf366e 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -100,8 +100,7 @@ host_create() local dev=$1; shift local host=$1; shift - simple_if_init $dev - defer simple_if_fini $dev + adf_simple_if_init $dev mtu_set $dev 10000 defer mtu_restore $dev @@ -250,8 +249,7 @@ setup_prepare() h3_mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index 9699a100a87d..f4be72b2145a 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -24,7 +24,8 @@ def _assert_napi_threaded_disabled(nl, napi_id) -> None: def _set_threaded_state(cfg, threaded) -> None: - cmd(f"echo {threaded} > /sys/class/net/{cfg.ifname}/threaded") + with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp: + fp.write(str(threaded).encode('utf-8')) def _setup_deferred_cleanup(cfg) -> None: @@ -38,6 +39,34 @@ def _setup_deferred_cleanup(cfg) -> None: return combined +def napi_init(cfg, nl) -> None: + """ + Test that threaded state (in the persistent NAPI config) gets updated + even when NAPI with given ID is not allocated at the time. + """ + + qcnt = _setup_deferred_cleanup(cfg) + + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 0) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ Test that when napi threaded is enabled at device level and @@ -103,7 +132,8 @@ def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, queue_count=2) as cfg: - ksft_run([change_num_queues, + ksft_run([napi_init, + change_num_queues, enable_dev_threaded_disable_napi_threaded], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh index ad2fb8b1c463..d1d23dc67f99 100755 --- a/tools/testing/selftests/drivers/net/netcons_cmdline.sh +++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh @@ -19,9 +19,6 @@ check_netconsole_module modprobe netdevsim 2> /dev/null || true rmmod netconsole 2> /dev/null || true -# The content of kmsg will be save to the following file -OUTPUT_FILE="/tmp/${TARGET}" - # Check for basic system dependency and exit if not found # check_for_dependencies # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) @@ -30,23 +27,39 @@ echo "6 5" > /proc/sys/kernel/printk trap do_cleanup EXIT # Create one namespace and two interfaces set_network -# Create the command line for netconsole, with the configuration from the -# function above -CMDLINE="$(create_cmdline_str)" - -# Load the module, with the cmdline set -modprobe netconsole "${CMDLINE}" - -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" -# Make sure the message was received in the dst part -# and exit -validate_msg "${OUTPUT_FILE}" + +# Run the test twice, with different cmdline parameters +for BINDMODE in "ifname" "mac" +do + echo "Running with bind mode: ${BINDMODE}" >&2 + # Create the command line for netconsole, with the configuration from + # the function above + CMDLINE=$(create_cmdline_str "${BINDMODE}") + + # The content of kmsg will be save to the following file + OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}" + + # Load the module, with the cmdline set + modprobe netconsole "${CMDLINE}" + + # Listed for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_msg "${OUTPUT_FILE}" + + # kill socat in case it is still running + pkill_socat + # Unload the module + rmmod netconsole + echo "${BINDMODE} : Test passed" >&2 +done exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py new file mode 100755 index 000000000000..4ae7a785ff10 --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp.py @@ -0,0 +1,627 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Test suite for PSP capable drivers.""" + +import errno +import fcntl +import socket +import struct +import termios +import time + +from lib.py import defer +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_true, ksft_eq, ksft_ne, ksft_gt, ksft_raises +from lib.py import ksft_not_none +from lib.py import KsftSkipEx +from lib.py import NetDrvEpEnv, PSPFamily, NlError +from lib.py import bkg, rand_port, wait_port_listen + + +def _get_outq(s): + one = b'\0' * 4 + outq = fcntl.ioctl(s.fileno(), termios.TIOCOUTQ, one) + return struct.unpack("I", outq)[0] + + +def _send_with_ack(cfg, msg): + cfg.comm_sock.send(msg) + response = cfg.comm_sock.recv(4) + if response != b'ack\0': + raise RuntimeError("Unexpected server response", response) + + +def _remote_read_len(cfg): + cfg.comm_sock.send(b'read len\0') + return int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + + +def _make_clr_conn(cfg, ipver=None): + _send_with_ack(cfg, b'conn clr\0') + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _make_psp_conn(cfg, version=0, ipver=None): + _send_with_ack(cfg, b'conn psp\0' + struct.pack('BB', version, version)) + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _close_conn(cfg, s): + _send_with_ack(cfg, b'data close\0') + s.close() + + +def _close_psp_conn(cfg, s): + _close_conn(cfg, s) + + +def _spi_xchg(s, rx): + s.send(struct.pack('I', rx['spi']) + rx['key']) + tx = s.recv(4 + len(rx['key'])) + return { + 'spi': struct.unpack('I', tx[:4])[0], + 'key': tx[4:] + } + + +def _send_careful(cfg, s, rounds): + data = b'0123456789' * 200 + for i in range(rounds): + n = 0 + for _ in range(10): # allow 10 retries + try: + n += s.send(data[n:], socket.MSG_DONTWAIT) + if n == len(data): + break + except BlockingIOError: + time.sleep(0.05) + else: + rlen = _remote_read_len(cfg) + outq = _get_outq(s) + report = f'sent: {i * len(data) + n} remote len: {rlen} outq: {outq}' + raise RuntimeError(report) + + return len(data) * rounds + + +def _check_data_rx(cfg, exp_len): + read_len = -1 + for _ in range(30): + cfg.comm_sock.send(b'read len\0') + read_len = int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + if read_len == exp_len: + break + time.sleep(0.01) + ksft_eq(read_len, exp_len) + + +def _check_data_outq(s, exp_len, force_wait=False): + outq = 0 + for _ in range(10): + outq = _get_outq(s) + if not force_wait and outq == exp_len: + break + time.sleep(0.01) + ksft_eq(outq, exp_len) + +# +# Test case boiler plate +# + +def _init_psp_dev(cfg): + if not hasattr(cfg, 'psp_dev_id'): + # Figure out which local device we are testing against + for dev in cfg.pspnl.dev_get({}, dump=True): + if dev['ifindex'] == cfg.ifindex: + cfg.psp_info = dev + cfg.psp_dev_id = cfg.psp_info['id'] + break + else: + raise KsftSkipEx("No PSP devices found") + + # Enable PSP if necessary + cap = cfg.psp_info['psp-versions-cap'] + ena = cfg.psp_info['psp-versions-ena'] + if cap != ena: + cfg.pspnl.dev_set({'id': cfg.psp_dev_id, 'psp-versions-ena': cap}) + defer(cfg.pspnl.dev_set, {'id': cfg.psp_dev_id, + 'psp-versions-ena': ena }) + +# +# Test cases +# + +def dev_list_devices(cfg): + """ Dump all devices """ + _init_psp_dev(cfg) + + devices = cfg.pspnl.dev_get({}, dump=True) + + found = False + for dev in devices: + found |= dev['id'] == cfg.psp_dev_id + ksft_true(found) + + +def dev_get_device(cfg): + """ Get the device we intend to use """ + _init_psp_dev(cfg) + + dev = cfg.pspnl.dev_get({'id': cfg.psp_dev_id}) + ksft_eq(dev['id'], cfg.psp_dev_id) + + +def dev_get_device_bad(cfg): + """ Test getting device which doesn't exist """ + raised = False + try: + cfg.pspnl.dev_get({'id': 1234567}) + except NlError as e: + ksft_eq(e.nl_msg.error, -errno.ENODEV) + raised = True + ksft_true(raised) + + +def dev_rotate(cfg): + """ Test key rotation """ + _init_psp_dev(cfg) + + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + + +def dev_rotate_spi(cfg): + """ Test key rotation and SPI check """ + _init_psp_dev(cfg) + + top_a = top_b = 0 + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc_a = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_a = assoc_a['rx-key']['spi'] >> 31 + s.close() + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + ksft_eq(rot['id'], cfg.psp_dev_id) + assoc_b = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_b = assoc_b['rx-key']['spi'] >> 31 + s.close() + ksft_ne(top_a, top_b) + + +def assoc_basic(cfg): + """ Test creating associations """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + assoc = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(assoc), 0) + s.close() + + +def assoc_bad_dev(cfg): + """ Test creating associations with bad device ID """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENODEV) + + +def assoc_sk_only_conn(cfg): + """ Test creating associations based on socket """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + _close_conn(cfg, s) + + +def assoc_sk_only_mismatch(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_sk_only_mismatch_tx(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_sk_only_unconn(cfg): + """ Test creating associations based on socket (unconnected, should fail) """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['miss-type'], "dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_version_mismatch(cfg): + """ Test creating associations where Rx and Tx PSP versions do not match """ + _init_psp_dev(cfg) + + versions = list(cfg.psp_info['psp-versions-cap']) + if len(versions) < 2: + raise KsftSkipEx("Not enough PSP versions supported by the device for the test") + + # Translate versions to integers + versions = [cfg.pspnl.consts["version"].entries[v].value for v in versions] + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + rx = cfg.pspnl.rx_assoc({"version": versions[0], + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + + for version in versions[1:]: + with ksft_raises(NlError) as cm: + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": rx['rx-key'], + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_twice(cfg): + """ Test reusing Tx assoc for two sockets """ + _init_psp_dev(cfg) + + def rx_assoc_check(s): + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + return assoc + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = rx_assoc_check(s) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(tx), 0) + + # Use the same Tx assoc second time + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s2: + rx_assoc_check(s2) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s2.fileno()}) + ksft_eq(len(tx), 0) + + s.close() + + +def _data_basic_send(cfg, version, ipver): + """ Test basic data send """ + _init_psp_dev(cfg) + + # Version 0 is required by spec, don't let it skip + if version: + name = cfg.pspnl.consts["version"].entries_by_val[version].name + if name not in cfg.psp_info['psp-versions-cap']: + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) + raise KsftSkipEx("PSP version not supported", name) + + s = _make_psp_conn(cfg, version, ipver) + + rx_assoc = cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _close_psp_conn(cfg, s) + + +def __bad_xfer_do(cfg, s, tx, version='hdr0-aes-gcm-128'): + # Make sure we accept the ACK for the SPI before we seal with the bad assoc + _check_data_outq(s, 0) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 20) + _check_data_outq(s, data_len, force_wait=True) + _check_data_rx(cfg, 0) + _close_psp_conn(cfg, s) + + +def data_send_bad_key(cfg): + """ Test send data with bad key """ + _init_psp_dev(cfg) + + s = _make_psp_conn(cfg) + + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + tx['key'] = (tx['key'][0] ^ 0xff).to_bytes(1, 'little') + tx['key'][1:] + __bad_xfer_do(cfg, s, tx) + + +def data_send_disconnect(cfg): + """ Test socket close after sending data """ + _init_psp_dev(cfg) + + with _make_psp_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + tx = _spi_xchg(s, assoc['rx-key']) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + + s.shutdown(socket.SHUT_RDWR) + s.close() + + +def _data_mss_adjust(cfg, ipver): + _init_psp_dev(cfg) + + # First figure out what the MSS would be without any adjustments + s = _make_clr_conn(cfg, ipver) + s.send(b"0123456789abcdef" * 1024) + _check_data_rx(cfg, 16 * 1024) + mss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + _close_conn(cfg, s) + + s = _make_psp_conn(cfg, 0, ipver) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + rxmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, rxmss) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + finally: + _close_psp_conn(cfg, s) + + +def data_stale_key(cfg): + """ Test send on a double-rotated key """ + _init_psp_dev(cfg) + + s = _make_psp_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + + s.send(b'0123456789' * 200) + _check_data_outq(s, 2000, force_wait=True) + finally: + _close_psp_conn(cfg, s) + + +def __nsim_psp_rereg(cfg): + # The PSP dev ID will change, remember what was there before + before = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + cfg._ns.nsims[0].dfs_write('psp_rereg', '1') + + after = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + new_devs = list(after - before) + ksft_eq(len(new_devs), 1) + cfg.psp_dev_id = list(after - before)[0] + + +def removal_device_rx(cfg): + """ Test removing a netdev / PSD with active Rx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_not_none(rx_assoc) + + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def removal_device_bi(cfg): + """ Test removing a netdev / PSD with active Rx/Tx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": rx_assoc['rx-key'], + "sock-fd": s.fileno()}) + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver): + """Build test cases for each combo of PSP version and IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}" + test_func(cfg, psp_ver, ipver) + return test_case + + +def ipver_test_builder(name, test_func, ipver): + """Build test cases for each IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_case.__name__ = f"{name}_ip{ipver}" + test_func(cfg, ipver) + return test_case + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.pspnl = PSPFamily() + + # Set up responder and communication sock + responder = cfg.remote.deploy("psp_responder") + + cfg.comm_port = rand_port() + srv = None + try: + with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote, + exit_wait=True) as srv: + wait_port_listen(cfg.comm_port, host=cfg.remote) + + cfg.comm_sock = socket.create_connection((cfg.remote_addr, + cfg.comm_port), + timeout=1) + + cases = [ + psp_ip_ver_test_builder( + "data_basic_send", _data_basic_send, version, ipver + ) + for version in range(0, 4) + for ipver in ("4", "6") + ] + cases += [ + ipver_test_builder("data_mss_adjust", _data_mss_adjust, ipver) + for ipver in ("4", "6") + ] + + ksft_run(cases=cases, globs=globals(), + case_pfx={"dev_", "data_", "assoc_", "removal_"}, + args=(cfg, )) + + cfg.comm_sock.send(b"exit\0") + cfg.comm_sock.close() + finally: + if srv and (srv.stdout or srv.stderr): + ksft_pr("") + ksft_pr(f"Responder logs ({srv.ret}):") + if srv and srv.stdout: + ksft_pr("STDOUT:\n# " + srv.stdout.strip().replace("\n", "\n# ")) + if srv and srv.stderr: + ksft_pr("STDERR:\n# " + srv.stderr.strip().replace("\n", "\n# ")) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c new file mode 100644 index 000000000000..f309e0d73cbf --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp_responder.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> +#include <sys/poll.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <netinet/in.h> +#include <unistd.h> + +#include <ynl.h> + +#include "psp-user.h" + +#define dbg(msg...) \ +do { \ + if (opts->verbose) \ + fprintf(stderr, "DEBUG: " msg); \ +} while (0) + +static bool should_quit; + +struct opts { + int port; + int devid; + bool verbose; +}; + +enum accept_cfg { + ACCEPT_CFG_NONE = 0, + ACCEPT_CFG_CLEAR, + ACCEPT_CFG_PSP, +}; + +static struct { + unsigned char tx; + unsigned char rx; +} psp_vers; + +static int conn_setup_psp(struct ynl_sock *ys, struct opts *opts, int data_sock) +{ + struct psp_rx_assoc_rsp *rsp; + struct psp_rx_assoc_req *req; + struct psp_tx_assoc_rsp *tsp; + struct psp_tx_assoc_req *teq; + char info[300]; + int key_len; + ssize_t sz; + __u32 spi; + + dbg("create PSP connection\n"); + + // Rx assoc alloc + req = psp_rx_assoc_req_alloc(); + + psp_rx_assoc_req_set_sock_fd(req, data_sock); + psp_rx_assoc_req_set_version(req, psp_vers.rx); + + rsp = psp_rx_assoc(ys, req); + psp_rx_assoc_req_free(req); + + if (!rsp) { + perror("ERROR: failed to Rx assoc"); + return -1; + } + + // SPI exchange + key_len = rsp->rx_key._len.key; + memcpy(info, &rsp->rx_key.spi, sizeof(spi)); + memcpy(&info[sizeof(spi)], rsp->rx_key.key, key_len); + sz = sizeof(spi) + key_len; + + send(data_sock, info, sz, MSG_WAITALL); + psp_rx_assoc_rsp_free(rsp); + + sz = recv(data_sock, info, sz, MSG_WAITALL); + if (sz < 0) { + perror("ERROR: failed to read PSP key from sock"); + return -1; + } + memcpy(&spi, info, sizeof(spi)); + + // Setup Tx assoc + teq = psp_tx_assoc_req_alloc(); + + psp_tx_assoc_req_set_sock_fd(teq, data_sock); + psp_tx_assoc_req_set_version(teq, psp_vers.tx); + psp_tx_assoc_req_set_tx_key_spi(teq, spi); + psp_tx_assoc_req_set_tx_key_key(teq, &info[sizeof(spi)], key_len); + + tsp = psp_tx_assoc(ys, teq); + psp_tx_assoc_req_free(teq); + if (!tsp) { + perror("ERROR: failed to Tx assoc"); + return -1; + } + psp_tx_assoc_rsp_free(tsp); + + return 0; +} + +static void send_ack(int sock) +{ + send(sock, "ack", 4, MSG_WAITALL); +} + +static void send_err(int sock) +{ + send(sock, "err", 4, MSG_WAITALL); +} + +static void send_str(int sock, int value) +{ + char buf[128]; + int ret; + + ret = snprintf(buf, sizeof(buf), "%d", value); + send(sock, buf, ret + 1, MSG_WAITALL); +} + +static void +run_session(struct ynl_sock *ys, struct opts *opts, + int server_sock, int comm_sock) +{ + enum accept_cfg accept_cfg = ACCEPT_CFG_NONE; + struct pollfd pfds[3]; + size_t data_read = 0; + int data_sock = -1; + + while (true) { + bool race_close = false; + int nfds; + + memset(pfds, 0, sizeof(pfds)); + + pfds[0].fd = server_sock; + pfds[0].events = POLLIN; + + pfds[1].fd = comm_sock; + pfds[1].events = POLLIN; + + nfds = 2; + if (data_sock >= 0) { + pfds[2].fd = data_sock; + pfds[2].events = POLLIN; + nfds++; + } + + dbg(" ...\n"); + if (poll(pfds, nfds, -1) < 0) { + perror("poll"); + break; + } + + /* data sock */ + if (pfds[2].revents & POLLIN) { + char buf[8192]; + ssize_t n; + + n = recv(data_sock, buf, sizeof(buf), 0); + if (n <= 0) { + if (n < 0) + perror("data read"); + close(data_sock); + data_sock = -1; + dbg("data sock closed\n"); + } else { + data_read += n; + dbg("data read %zd\n", data_read); + } + } + + /* comm sock */ + if (pfds[1].revents & POLLIN) { + static char buf[4096]; + static ssize_t off; + bool consumed; + ssize_t n; + + n = recv(comm_sock, &buf[off], sizeof(buf) - off, 0); + if (n <= 0) { + if (n < 0) + perror("comm read"); + return; + } + + off += n; + n = off; + +#define __consume(sz) \ + ({ \ + if (n == (sz)) { \ + off = 0; \ + } else { \ + off -= (sz); \ + memmove(buf, &buf[(sz)], off); \ + } \ + }) + +#define cmd(_name) \ + ({ \ + ssize_t sz = sizeof(_name); \ + bool match = n >= sz && !memcmp(buf, _name, sz); \ + \ + if (match) { \ + dbg("command: " _name "\n"); \ + __consume(sz); \ + } \ + consumed |= match; \ + match; \ + }) + + do { + consumed = false; + + if (cmd("read len")) + send_str(comm_sock, data_read); + + if (cmd("data echo")) { + if (data_sock >= 0) + send(data_sock, "echo", 5, + MSG_WAITALL); + else + fprintf(stderr, "WARN: echo but no data sock\n"); + send_ack(comm_sock); + } + if (cmd("data close")) { + if (data_sock >= 0) { + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + race_close = true; + } + } + if (cmd("conn psp")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_PSP; + send_ack(comm_sock); + /* next two bytes are versions */ + if (off >= 2) { + memcpy(&psp_vers, buf, 2); + __consume(2); + } else { + fprintf(stderr, "WARN: short conn psp command!\n"); + } + } + if (cmd("conn clr")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_CLEAR; + send_ack(comm_sock); + } + if (cmd("exit")) + should_quit = true; +#undef cmd + + if (!consumed) { + fprintf(stderr, "WARN: unknown cmd: [%zd] %s\n", + off, buf); + } + } while (consumed && off); + } + + /* server sock */ + if (pfds[0].revents & POLLIN) { + if (data_sock >= 0) { + fprintf(stderr, "WARN: new data sock but old one still here\n"); + close(data_sock); + data_sock = -1; + } + data_sock = accept(server_sock, NULL, NULL); + if (data_sock < 0) { + perror("accept"); + continue; + } + data_read = 0; + + if (accept_cfg == ACCEPT_CFG_CLEAR) { + dbg("new data sock: clear\n"); + /* nothing to do */ + } else if (accept_cfg == ACCEPT_CFG_PSP) { + dbg("new data sock: psp\n"); + conn_setup_psp(ys, opts, data_sock); + } else { + fprintf(stderr, "WARN: new data sock but no config\n"); + } + accept_cfg = ACCEPT_CFG_NONE; + } + + if (race_close) { + if (data_sock >= 0) { + /* indeed, ordering problem, handle the close */ + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + fprintf(stderr, "WARN: close but no data sock\n"); + send_err(comm_sock); + } + } + } + dbg("session ending\n"); +} + +static int spawn_server(struct opts *opts) +{ + struct sockaddr_in6 addr; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd < 0) { + perror("can't open socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_any; + addr.sin6_port = htons(opts->port); + + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) { + perror("can't bind socket"); + return -1; + } + + if (listen(fd, 5)) { + perror("can't listen"); + return -1; + } + + return fd; +} + +static int run_responder(struct ynl_sock *ys, struct opts *opts) +{ + int server_sock, comm; + + server_sock = spawn_server(opts); + if (server_sock < 0) + return 4; + + while (!should_quit) { + comm = accept(server_sock, NULL, NULL); + if (comm < 0) { + perror("accept failed"); + } else { + run_session(ys, opts, server_sock, comm); + close(comm); + } + } + + return 0; +} + +static void usage(const char *name, const char *miss) +{ + if (miss) + fprintf(stderr, "Missing argument: %s\n", miss); + + fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name); + exit(EXIT_FAILURE); +} + +static void parse_cmd_opts(int argc, char **argv, struct opts *opts) +{ + int opt; + + while ((opt = getopt(argc, argv, "vp:d:")) != -1) { + switch (opt) { + case 'v': + opts->verbose = 1; + break; + case 'p': + opts->port = atoi(optarg); + break; + case 'd': + opts->devid = atoi(optarg); + break; + default: + usage(argv[0], NULL); + } + } +} + +static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions) +{ + struct psp_dev_set_req *sreq; + struct psp_dev_set_rsp *srsp; + + fprintf(stderr, "Set PSP enable on device %d to 0x%x\n", + dev_id, versions); + + sreq = psp_dev_set_req_alloc(); + + psp_dev_set_req_set_id(sreq, dev_id); + psp_dev_set_req_set_psp_versions_ena(sreq, versions); + + srsp = psp_dev_set(ys, sreq); + psp_dev_set_req_free(sreq); + if (!srsp) + return 10; + + psp_dev_set_rsp_free(srsp); + return 0; +} + +int main(int argc, char **argv) +{ + struct psp_dev_get_list *dev_list; + bool devid_found = false; + __u32 ver_ena, ver_cap; + struct opts opts = {}; + struct ynl_error yerr; + struct ynl_sock *ys; + int first_id = 0; + int ret; + + parse_cmd_opts(argc, argv, &opts); + if (!opts.port) + usage(argv[0], "port"); // exits + + ys = ynl_sock_create(&ynl_psp_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + dev_list = psp_dev_get_dump(ys); + if (ynl_dump_empty(dev_list)) { + if (ys->err.code) + goto err_close; + fprintf(stderr, "No PSP devices\n"); + goto err_close_silent; + } + + ynl_dump_foreach(dev_list, d) { + if (opts.devid) { + devid_found = true; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + } else if (!first_id) { + first_id = d->id; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + } else { + fprintf(stderr, "Multiple PSP devices found\n"); + goto err_close_silent; + } + } + psp_dev_get_list_free(dev_list); + + if (opts.devid && !devid_found) { + fprintf(stderr, "PSP device %d requested on cmdline, not found\n", + opts.devid); + goto err_close_silent; + } else if (!opts.devid) { + opts.devid = first_id; + } + + if (ver_ena != ver_cap) { + ret = psp_dev_set_ena(ys, opts.devid, ver_cap); + if (ret) + goto err_close; + } + + ret = run_responder(ys, &opts); + + if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena)) + fprintf(stderr, "WARN: failed to set the PSP versions back\n"); + + ynl_sock_destroy(ys); + + return ret; + +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_close_silent: + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index c2bb5d3f1ca1..04d0a2a13e73 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -57,6 +57,36 @@ def check_fec(cfg) -> None: ksft_true(data['stats'], "driver does not report stats") +def check_fec_hist(cfg) -> None: + """ + Check that drivers which support FEC histogram statistics report + reasonable values. + """ + + try: + data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, + "flags": {'stats'}}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("FEC not supported by the device") from e + raise + if 'stats' not in data: + raise KsftSkipEx("FEC stats not supported by the device") + if 'hist' not in data['stats']: + raise KsftSkipEx("FEC histogram not supported by the device") + + hist = data['stats']['hist'] + for fec_bin in hist: + for key in ['bin-low', 'bin-high', 'bin-val']: + ksft_in(key, fec_bin, + "Drivers should always report FEC bin range and value") + ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'], + "FEC bin range should be valid") + if 'bin-val-per-lane' in fec_bin: + ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'], + "FEC bin value should be equal to sum of per-plane values") + + def pkt_byte_sum(cfg) -> None: """ Check that qstat and interface stats match in value. @@ -279,8 +309,9 @@ def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, - check_down, procfs_hammer, procfs_downup_hammer], + ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum, + qstat_by_ifindex, check_down, procfs_hammer, + procfs_downup_hammer], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index eaf6938f100e..89d854c7e674 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -1,11 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -TEST_PROGS := dev_addr_lists.sh propagation.sh +TEST_PROGS := dev_addr_lists.sh propagation.sh options.sh TEST_INCLUDES := \ ../bonding/lag_lib.sh \ ../../../net/forwarding/lib.sh \ - ../../../net/lib.sh + ../../../net/lib.sh \ + ../../../net/in_netns.sh \ + ../../../net/lib/sh/defer.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 636b3525b679..558e1d0cf565 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -3,4 +3,5 @@ CONFIG_IPV6=y CONFIG_MACVLAN=y CONFIG_NETDEVSIM=m CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh new file mode 100755 index 000000000000..44888f32b513 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/options.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify basic set and get functionality of the team +# driver options over netlink. + +# Run in private netns. +test_dir="$(dirname "$0")" +if [[ $# -eq 0 ]]; then + "${test_dir}"/../../../net/in_netns.sh "$0" __subprocess + exit $? +fi + +ALL_TESTS=" + team_test_options +" + +source "${test_dir}/../../../net/lib.sh" + +TEAM_PORT="team0" +MEMBER_PORT="dummy0" + +setup() +{ + ip link add name "${MEMBER_PORT}" type dummy + ip link add name "${TEAM_PORT}" type team +} + +get_and_check_value() +{ + local option_name="$1" + local expected_value="$2" + local port_flag="$3" + + local value_from_get + + if ! value_from_get=$(teamnl "${TEAM_PORT}" getoption "${option_name}" \ + "${port_flag}"); then + echo "Could not get option '${option_name}'" >&2 + return 1 + fi + + if [[ "${value_from_get}" != "${expected_value}" ]]; then + echo "Incorrect value for option '${option_name}'" >&2 + echo "get (${value_from_get}) != set (${expected_value})" >&2 + return 1 + fi +} + +set_and_check_get() +{ + local option_name="$1" + local option_value="$2" + local port_flag="$3" + + local value_from_get + + if ! teamnl "${TEAM_PORT}" setoption "${option_name}" \ + "${option_value}" "${port_flag}"; then + echo "'setoption ${option_name} ${option_value}' failed" >&2 + return 1 + fi + + get_and_check_value "${option_name}" "${option_value}" "${port_flag}" + return $? +} + +# Get a "port flag" to pass to the `teamnl` command. +# E.g. $1="dummy0" -> "port=dummy0", +# $1="" -> "" +get_port_flag() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + echo "--port=${port_name}" + fi +} + +attach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" master "${TEAM_PORT}" + return $? + fi +} + +detach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" nomaster + return $? + fi +} + +# Test that an option's get value matches its set value. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# value_1 - The first value to try setting. +# value_2 - The second value to try setting. +# port_name - The (optional) name of the attached port. +team_test_option() +{ + local option_name="$1" + local value_1="$2" + local value_2="$3" + local possible_values="$2 $3 $2" + local port_name="$4" + local port_flag + + RET=0 + + echo "Setting '${option_name}' to '${value_1}' and '${value_2}'" + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Set and get both possible values. + for value in ${possible_values}; do + set_and_check_get "${option_name}" "${value}" "${port_flag}" + check_err $? "Failed to set '${option_name}' to '${value}'" + done + + detach_port_if_specified "${port_name}" + check_err $? "Couldn't detach ${port_name} from its master" + + log_test "Set + Get '${option_name}' test" +} + +# Test that getting a non-existant option fails. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# port_name - The (optional) name of the attached port. +team_test_get_option_fails() +{ + local option_name="$1" + local port_name="$2" + local port_flag + + RET=0 + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Just confirm that getting the value fails. + teamnl "${TEAM_PORT}" getoption "${option_name}" "${port_flag}" + check_fail $? "Shouldn't be able to get option '${option_name}'" + + detach_port_if_specified "${port_name}" + + log_test "Get '${option_name}' fails" +} + +team_test_options() +{ + # Wrong option name behavior. + team_test_get_option_fails fake_option1 + team_test_get_option_fails fake_option2 "${MEMBER_PORT}" + + # Correct set and get behavior. + team_test_option mode activebackup loadbalance + team_test_option notify_peers_count 0 5 + team_test_option notify_peers_interval 0 5 + team_test_option mcast_rejoin_count 0 5 + team_test_option mcast_rejoin_interval 0 5 + team_test_option enabled true false "${MEMBER_PORT}" + team_test_option user_linkup true false "${MEMBER_PORT}" + team_test_option user_linkup_enabled true false "${MEMBER_PORT}" + team_test_option priority 10 20 "${MEMBER_PORT}" + team_test_option queue_id 0 1 "${MEMBER_PORT}" +} + +require_command teamnl +setup +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 1dd8bf3bf6c9..08fea4230759 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -112,10 +112,10 @@ def _load_xdp_prog(cfg, bpf_info): defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) cmd( - f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}", + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}", shell=True ) - defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off") xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] prog_info["id"] = xdp_info["xdp"]["prog"]["id"] @@ -290,34 +290,78 @@ def test_xdp_native_drop_mb(cfg): _test_drop(cfg, bpf_info, 8000) -def test_xdp_native_tx_mb(cfg): +def _test_xdp_native_tx(cfg, bpf_info, payload_lens): """ - Tests the XDP_TX action for a multi-buff case. + Tests the XDP_TX action. Args: cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing the BPF program metadata. + payload_lens: Array of packet lengths to send. """ cfg.require_cmd("socat", remote=True) - - bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) prog_info = _load_xdp_prog(cfg, bpf_info) port = rand_port() _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) - test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000)) - rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" - tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + expected_pkts = 0 + for payload_len in payload_lens: + test_string = "".join( + random.choice(string.ascii_lowercase) for _ in range(payload_len) + ) + + rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \ + f"-u UDP-RECV:{port},reuseport STDOUT" + + # Writing zero bytes to stdin gets ignored by socat, + # but with the shut-null flag socat generates a zero sized packet + # when the socket is closed. + tx_cmd_suffix = ",shut-null" if payload_len == 0 else "" + tx_udp = f"echo -n {test_string} | socat -t 2 " + \ + f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + wait_port_listen(port, proto="udp", host=cfg.remote) + cmd(tx_udp, host=cfg.remote, shell=True) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + + expected_pkts += 1 + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch") + ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch") + + +def test_xdp_native_tx_sb(cfg): + """ + Tests the XDP_TX action for a single-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + # Ensure there's enough room for an ETH / IP / UDP header + pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62 - with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: - wait_port_listen(port, proto="udp", host=cfg.remote) - cmd(tx_udp, host=cfg.remote, shell=True) + _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len]) - stats = _get_stats(prog_info['maps']['map_xdp_stats']) - ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") - ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch") +def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", + "xdp.frags", 9000) + # The first packet ensures we exercise the fragmented code path. + # And the subsequent 0-sized packet ensures the driver + # reinitializes xdp_buff correctly. + _test_xdp_native_tx(cfg, bpf_info, [8000, 0]) def _validate_res(res, offset_lst, pkt_sz_lst): @@ -644,6 +688,7 @@ def main(): test_xdp_native_pass_mb, test_xdp_native_drop_sb, test_xdp_native_drop_mb, + test_xdp_native_tx_sb, test_xdp_native_tx_mb, test_xdp_native_adjst_tail_grow_data, test_xdp_native_adjst_tail_shrnk_data, diff --git a/tools/testing/selftests/filesystems/fuse/.gitignore b/tools/testing/selftests/filesystems/fuse/.gitignore new file mode 100644 index 000000000000..3e72e742d08e --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +fuse_mnt +fusectl_test diff --git a/tools/testing/selftests/filesystems/fuse/Makefile b/tools/testing/selftests/filesystems/fuse/Makefile new file mode 100644 index 000000000000..612aad69a93a --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) + +TEST_GEN_PROGS := fusectl_test +TEST_GEN_FILES := fuse_mnt + +include ../../lib.mk + +VAR_CFLAGS := $(shell pkg-config fuse --cflags 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS := -D_FILE_OFFSET_BITS=64 -I/usr/include/fuse +endif + +VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS := -lfuse -pthread +endif + +$(OUTPUT)/fuse_mnt: CFLAGS += $(VAR_CFLAGS) +$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS) diff --git a/tools/testing/selftests/filesystems/fuse/fuse_mnt.c b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c new file mode 100644 index 000000000000..d12b17f30fad --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fusectl test file-system + * Creates a simple FUSE filesystem with a single read-write file (/test) + */ + +#define FUSE_USE_VERSION 26 + +#include <fuse.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static char *content; +static size_t content_size = 0; +static const char test_path[] = "/test"; + +static int test_getattr(const char *path, struct stat *st) +{ + memset(st, 0, sizeof(*st)); + + if (!strcmp(path, "/")) { + st->st_mode = S_IFDIR | 0755; + st->st_nlink = 2; + return 0; + } + + if (!strcmp(path, test_path)) { + st->st_mode = S_IFREG | 0664; + st->st_nlink = 1; + st->st_size = content_size; + return 0; + } + + return -ENOENT; +} + +static int test_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi) +{ + if (strcmp(path, "/")) + return -ENOENT; + + filler(buf, ".", NULL, 0); + filler(buf, "..", NULL, 0); + filler(buf, test_path + 1, NULL, 0); + + return 0; +} + +static int test_open(const char *path, struct fuse_file_info *fi) +{ + if (strcmp(path, test_path)) + return -ENOENT; + + return 0; +} + +static int test_read(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if (!content || content_size == 0) + return 0; + + if (offset >= content_size) + return 0; + + if (offset + size > content_size) + size = content_size - offset; + + memcpy(buf, content + offset, size); + + return size; +} + +static int test_write(const char *path, const char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + size_t new_size; + + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if(offset > content_size) + return -EINVAL; + + new_size = MAX(offset + size, content_size); + + if (new_size > content_size) + content = realloc(content, new_size); + + content_size = new_size; + + if (!content) + return -ENOMEM; + + memcpy(content + offset, buf, size); + + return size; +} + +static int test_truncate(const char *path, off_t size) +{ + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if (size == 0) { + free(content); + content = NULL; + content_size = 0; + return 0; + } + + content = realloc(content, size); + + if (!content) + return -ENOMEM; + + if (size > content_size) + memset(content + content_size, 0, size - content_size); + + content_size = size; + return 0; +} + +static struct fuse_operations memfd_ops = { + .getattr = test_getattr, + .readdir = test_readdir, + .open = test_open, + .read = test_read, + .write = test_write, + .truncate = test_truncate, +}; + +int main(int argc, char *argv[]) +{ + return fuse_main(argc, argv, &memfd_ops, NULL); +} diff --git a/tools/testing/selftests/filesystems/fuse/fusectl_test.c b/tools/testing/selftests/filesystems/fuse/fusectl_test.c new file mode 100644 index 000000000000..8d124d1cacb2 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fusectl_test.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Chen Linxuan <chenlinxuan@uniontech.com> + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <dirent.h> +#include <sched.h> +#include <linux/limits.h> + +#include "../../kselftest_harness.h" + +#define FUSECTL_MOUNTPOINT "/sys/fs/fuse/connections" +#define FUSE_MOUNTPOINT "/tmp/fuse_mnt_XXXXXX" +#define FUSE_DEVICE "/dev/fuse" +#define FUSECTL_TEST_VALUE "1" + +static void write_file(struct __test_metadata *const _metadata, + const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + + ASSERT_GE(fd, 0); + ASSERT_EQ(write(fd, val, len), len); + ASSERT_EQ(close(fd), 0); +} + +FIXTURE(fusectl){ + char fuse_mountpoint[sizeof(FUSE_MOUNTPOINT)]; + int connection; +}; + +FIXTURE_SETUP(fusectl) +{ + const char *fuse_mnt_prog = "./fuse_mnt"; + int status, pid; + struct stat statbuf; + uid_t uid = getuid(); + gid_t gid = getgid(); + char buf[32]; + + /* Setup userns */ + ASSERT_EQ(unshare(CLONE_NEWNS|CLONE_NEWUSER), 0); + sprintf(buf, "0 %d 1", uid); + write_file(_metadata, "/proc/self/uid_map", buf); + write_file(_metadata, "/proc/self/setgroups", "deny"); + sprintf(buf, "0 %d 1", gid); + write_file(_metadata, "/proc/self/gid_map", buf); + ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0); + + strcpy(self->fuse_mountpoint, FUSE_MOUNTPOINT); + + if (!mkdtemp(self->fuse_mountpoint)) + SKIP(return, + "Failed to create FUSE mountpoint %s", + strerror(errno)); + + if (access(FUSECTL_MOUNTPOINT, F_OK)) + SKIP(return, + "FUSE control filesystem not mounted"); + + pid = fork(); + if (pid < 0) + SKIP(return, + "Failed to fork FUSE daemon process: %s", + strerror(errno)); + + if (pid == 0) { + execlp(fuse_mnt_prog, fuse_mnt_prog, self->fuse_mountpoint, NULL); + exit(errno); + } + + waitpid(pid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + SKIP(return, + "Failed to start FUSE daemon %s", + strerror(WEXITSTATUS(status))); + } + + if (stat(self->fuse_mountpoint, &statbuf)) + SKIP(return, + "Failed to stat FUSE mountpoint %s", + strerror(errno)); + + self->connection = statbuf.st_dev; +} + +FIXTURE_TEARDOWN(fusectl) +{ + umount2(self->fuse_mountpoint, MNT_DETACH); + rmdir(self->fuse_mountpoint); +} + +TEST_F(fusectl, abort) +{ + char path_buf[PATH_MAX]; + int abort_fd, test_fd, ret; + + sprintf(path_buf, "/sys/fs/fuse/connections/%d/abort", self->connection); + + ASSERT_EQ(0, access(path_buf, F_OK)); + + abort_fd = open(path_buf, O_WRONLY); + ASSERT_GE(abort_fd, 0); + + sprintf(path_buf, "%s/test", self->fuse_mountpoint); + + test_fd = open(path_buf, O_RDWR); + ASSERT_GE(test_fd, 0); + + ret = read(test_fd, path_buf, sizeof(path_buf)); + ASSERT_EQ(ret, 0); + + ret = write(test_fd, "test", sizeof("test")); + ASSERT_EQ(ret, sizeof("test")); + + ret = lseek(test_fd, 0, SEEK_SET); + ASSERT_GE(ret, 0); + + ret = write(abort_fd, FUSECTL_TEST_VALUE, sizeof(FUSECTL_TEST_VALUE)); + ASSERT_GT(ret, 0); + + close(abort_fd); + + ret = read(test_fd, path_buf, sizeof(path_buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOTCONN); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 3c3e08b8c90e..772ca1db6e59 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -1042,15 +1042,13 @@ static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents) .dev_id = dev_id, }, }; - int ret; while (nvevents--) { - ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), - &trigger_vevent_cmd); - if (ret < 0) + if (!ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), + &trigger_vevent_cmd)) return -1; } - return ret; + return 0; } #define test_cmd_trigger_vevents(dev_id, nvevents) \ diff --git a/tools/testing/selftests/kexec/.gitignore b/tools/testing/selftests/kexec/.gitignore new file mode 100644 index 000000000000..5f3d9e089ae8 --- /dev/null +++ b/tools/testing/selftests/kexec/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +test_kexec_jump diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c index 8034e24c6bf6..6d9e91d55d68 100644 --- a/tools/testing/selftests/kho/init.c +++ b/tools/testing/selftests/kho/init.c @@ -1,22 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -#ifndef NOLIBC -#include <errno.h> #include <stdio.h> #include <unistd.h> #include <fcntl.h> -#include <syscall.h> +#include <sys/syscall.h> #include <sys/mount.h> #include <sys/reboot.h> -#endif +#include <linux/kexec.h> /* from arch/x86/include/asm/setup.h */ #define COMMAND_LINE_SIZE 2048 -/* from include/linux/kexex.h */ -#define KEXEC_FILE_NO_INITRAMFS 0x00000004 - -#define KHO_FINILIZE "/debugfs/kho/out/finalize" +#define KHO_FINALIZE "/debugfs/kho/out/finalize" #define KERNEL_IMAGE "/kernel" static int mount_filesystems(void) @@ -32,7 +27,7 @@ static int kho_enable(void) const char enable[] = "1"; int fd; - fd = open(KHO_FINILIZE, O_RDWR); + fd = open(KHO_FINALIZE, O_RDWR); if (fd < 0) return -1; diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh index ec70a17bd476..3f6c17166846 100755 --- a/tools/testing/selftests/kho/vmtest.sh +++ b/tools/testing/selftests/kho/vmtest.sh @@ -10,7 +10,6 @@ kernel_dir=$(realpath "$test_dir/../../../..") tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX) headers_dir="$tmp_dir/usr" -initrd_dir="$tmp_dir/initrd" initrd="$tmp_dir/initrd.cpio" source "$test_dir/../kselftest/ktap_helpers.sh" @@ -81,19 +80,22 @@ EOF function mkinitrd() { local kernel=$1 - mkdir -p "$initrd_dir"/{dev,debugfs,proc} - sudo mknod "$initrd_dir/dev/console" c 5 1 - - "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -I"$headers_dir/include" \ - -fno-asynchronous-unwind-tables -fno-ident -nostdlib \ - -include "$test_dir/../../../include/nolibc/nolibc.h" \ - -o "$initrd_dir/init" "$test_dir/init.c" \ - - cp "$kernel" "$initrd_dir/kernel" + "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -nostdlib \ + -fno-asynchronous-unwind-tables -fno-ident \ + -I "$headers_dir/include" \ + -I "$kernel_dir/tools/include/nolibc" \ + -o "$tmp_dir/init" "$test_dir/init.c" + + cat > "$tmp_dir/cpio_list" <<EOF +dir /dev 0755 0 0 +dir /proc 0755 0 0 +dir /debugfs 0755 0 0 +nod /dev/console 0600 0 0 c 5 1 +file /init $tmp_dir/init 0755 0 0 +file /kernel $kernel 0644 0 0 +EOF - pushd "$initrd_dir" &>/dev/null - find . | cpio -H newc --create > "$initrd" 2>/dev/null - popd &>/dev/null + "$build_dir/usr/gen_init_cpio" "$tmp_dir/cpio_list" > "$initrd" } function run_qemu() { diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 8deeb4b72e73..afbcf8412ae5 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -93,6 +93,14 @@ #endif #define __printf(a, b) __attribute__((format(printf, a, b))) +#ifndef __always_unused +#define __always_unused __attribute__((__unused__)) +#endif + +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + /* counters */ struct ksft_count { unsigned int ksft_pass; diff --git a/tools/testing/selftests/kselftest_harness/Makefile b/tools/testing/selftests/kselftest_harness/Makefile index 0617535a6ce4..d2369c01701a 100644 --- a/tools/testing/selftests/kselftest_harness/Makefile +++ b/tools/testing/selftests/kselftest_harness/Makefile @@ -2,6 +2,7 @@ TEST_GEN_PROGS_EXTENDED := harness-selftest TEST_PROGS := harness-selftest.sh +TEST_FILES := harness-selftest.expected EXTRA_CLEAN := harness-selftest.seen include ../lib.mk diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 41b40c676d7f..8926ff6808cf 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -156,6 +156,7 @@ TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/hello_el2 TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls TEST_GEN_PROGS_arm64 += arm64/external_aborts @@ -175,6 +176,7 @@ TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test TEST_GEN_PROGS_arm64 += dirty_log_perf_test TEST_GEN_PROGS_arm64 += get-reg-list +TEST_GEN_PROGS_arm64 += guest_memfd_test TEST_GEN_PROGS_arm64 += memslot_modification_stress_test TEST_GEN_PROGS_arm64 += memslot_perf_test TEST_GEN_PROGS_arm64 += mmu_stress_test @@ -196,9 +198,15 @@ TEST_GEN_PROGS_s390 += rseq_test TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test +TEST_GEN_PROGS_riscv += access_tracking_perf_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += coalesced_io_test +TEST_GEN_PROGS_riscv += dirty_log_perf_test TEST_GEN_PROGS_riscv += get-reg-list +TEST_GEN_PROGS_riscv += memslot_modification_stress_test +TEST_GEN_PROGS_riscv += memslot_perf_test +TEST_GEN_PROGS_riscv += mmu_stress_test +TEST_GEN_PROGS_riscv += rseq_test TEST_GEN_PROGS_riscv += steal_time TEST_GEN_PROGS_loongarch += coalesced_io_test diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index c9de66537ec3..b058f27b2141 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -50,6 +50,7 @@ #include "memstress.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" #include "cgroup_util.h" #include "lru_gen_util.h" diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c index eeba1cc87ff8..d592a4515399 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c @@ -165,10 +165,8 @@ static void guest_code(void) static void test_init_timer_irq(struct kvm_vm *vm) { /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]); + vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]); sync_global_to_guest(vm, ptimer_irq); sync_global_to_guest(vm, vtimer_irq); @@ -176,14 +174,14 @@ static void test_init_timer_irq(struct kvm_vm *vm) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } -static int gic_fd; - struct kvm_vm *test_vm_create(void) { struct kvm_vm *vm; unsigned int i; int nr_vcpus = test_args.nr_vcpus; + TEST_REQUIRE(kvm_supports_vgic_v3()); + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); vm_init_descriptor_tables(vm); @@ -204,8 +202,6 @@ struct kvm_vm *test_vm_create(void) vcpu_init_descriptor_tables(vcpus[i]); test_init_timer_irq(vm); - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); /* Make all the test's cmdline args visible to the guest */ sync_global_to_guest(vm, test_args); @@ -215,6 +211,5 @@ struct kvm_vm *test_vm_create(void) void test_vm_cleanup(struct kvm_vm *vm) { - close(gic_fd); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index ce74d069cb7b..91906414a474 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -924,10 +924,8 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + ptimer_irq = vcpu_get_ptimer_irq(vcpu); + vtimer_irq = vcpu_get_vtimer_irq(vcpu); sync_global_to_guest(vm, ptimer_irq); sync_global_to_guest(vm, vtimer_irq); @@ -935,8 +933,6 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } -static int gic_fd; - static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, enum arch_timer timer) { @@ -951,8 +947,6 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, vcpu_args_set(*vcpu, 1, timer); test_init_timer_irq(*vm, *vcpu); - gic_fd = vgic_v3_setup(*vm, 1, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); sync_global_to_guest(*vm, test_args); sync_global_to_guest(*vm, CVAL_MAX); @@ -961,7 +955,6 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, static void test_vm_cleanup(struct kvm_vm *vm) { - close(gic_fd); kvm_vm_free(vm); } @@ -1042,6 +1035,8 @@ int main(int argc, char *argv[]) /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); + TEST_REQUIRE(kvm_supports_vgic_v3()); + if (!parse_args(argc, argv)) exit(KSFT_SKIP); diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c index 062bf84cced1..592b26ded779 100644 --- a/tools/testing/selftests/kvm/arm64/external_aborts.c +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -250,6 +250,47 @@ static void test_serror(void) kvm_vm_free(vm); } +static void expect_sea_s1ptw_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3)); + + GUEST_DONE(); +} + +static noinline void test_s1ptw_abort_guest(void) +{ + extern char test_s1ptw_abort_insn; + + WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn); + + asm volatile("test_s1ptw_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("Load on S1PTW abort should not retire"); +} + +static void test_s1ptw_abort(void) +{ + struct kvm_vcpu *vcpu; + u64 *ptep, bad_pa; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest, + expect_sea_s1ptw_handler); + + ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2); + bad_pa = BIT(vm->pa_bits) - vm->page_size; + + *ptep &= ~GENMASK(47, 12); + *ptep |= bad_pa; + + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + static void test_serror_emulated_guest(void) { GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); @@ -327,4 +368,5 @@ int main(void) test_serror_masked(); test_serror_emulated(); test_mmio_ease(); + test_s1ptw_abort(); } diff --git a/tools/testing/selftests/kvm/arm64/hello_el2.c b/tools/testing/selftests/kvm/arm64/hello_el2.c new file mode 100644 index 000000000000..bbe6862c6ab1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/hello_el2.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1 + * + * Copyright 2025 Google LLC + */ +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include "ucall.h" + +#include <asm/sysreg.h> + +static void guest_code(void) +{ + u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1); + u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1); + u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4); + + GUEST_ASSERT_EQ(get_current_el(), 2); + GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H); + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1), + ID_AA64MMFR1_EL1_VH_IMP); + + /* + * Traps of the complete ID register space are IMPDEF without FEAT_FGT, + * which is really annoying to deal with in KVM describing E2H as RES1. + * + * If the implementation doesn't honor the trap then expect the register + * to return all zeros. + */ + if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP) + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0), + ID_AA64MMFR0_EL1_FGT_NI); + else + GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1); + + GUEST_DONE(); +} + +int main(void) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); + + vm = vm_create(1); + + kvm_get_default_vcpu_target(vm, &init); + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index 44cfcf8a7f46..bf038a0371f4 100644 --- a/tools/testing/selftests/kvm/arm64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -108,7 +108,7 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info) for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) { memset(&res, 0, sizeof(res)); - smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); + do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); switch (stage) { case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c index af9581b860f1..b5be9133535a 100644 --- a/tools/testing/selftests/kvm/arm64/kvm-uuid.c +++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c @@ -25,7 +25,7 @@ static void guest_code(void) { struct arm_smccc_res res = {}; - smccc_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); + do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 && diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c index f222538e6084..152c34776981 100644 --- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -163,6 +163,8 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; uint64_t pfr0; + test_disable_default_vgic(); + vm = vm_create_with_one_vcpu(&vcpu, NULL); pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0), diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c index ab491ee9e5f7..98e49f710aef 100644 --- a/tools/testing/selftests/kvm/arm64/psci_test.c +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -27,7 +27,7 @@ static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, + do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, 0, 0, 0, 0, &res); return res.a0; @@ -38,7 +38,7 @@ static uint64_t psci_affinity_info(uint64_t target_affinity, { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, + do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, 0, 0, 0, 0, 0, &res); return res.a0; @@ -48,7 +48,7 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, + do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, 0, 0, 0, 0, 0, &res); return res.a0; @@ -58,7 +58,7 @@ static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); + do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); return res.a0; } @@ -67,7 +67,7 @@ static uint64_t psci_features(uint32_t func_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); + do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); return res.a0; } @@ -89,12 +89,13 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, vm = vm_create(2); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *source = aarch64_vcpu_add(vm, 0, &init, guest_code); *target = aarch64_vcpu_add(vm, 1, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 189321e96925..8ff1e853f7f8 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -15,8 +15,6 @@ #include "test_util.h" #include <linux/bitfield.h> -bool have_cap_arm_mte; - enum ftr_type { FTR_EXACT, /* Use a predefined safe value */ FTR_LOWER_SAFE, /* Smaller value is safe */ @@ -125,6 +123,13 @@ static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), @@ -165,7 +170,9 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0), REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0), @@ -221,6 +228,7 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1), TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), + TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1), TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), @@ -239,6 +247,7 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); @@ -568,7 +577,9 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) uint64_t mte_frac; int idx, err; - if (!have_cap_arm_mte) { + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); + if (!mte) { ksft_test_result_skip("MTE capability not supported, nothing to test\n"); return; } @@ -593,9 +604,6 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) * from unsupported (0xF) to supported (0). * */ - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); - - mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { @@ -750,28 +758,23 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) ksft_test_result_pass("%s\n", __func__); } -void kvm_arch_vm_post_create(struct kvm_vm *vm) -{ - if (vm_check_cap(vm, KVM_CAP_ARM_MTE)) { - vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); - have_cap_arm_mte = true; - } -} - int main(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; bool aarch64_only; uint64_t val, el0; - int test_cnt; + int test_cnt, i, j; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS)); + test_wants_mte(); + vm = vm_create(1); vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0); vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); /* Check for AARCH64 only system */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); @@ -780,13 +783,10 @@ int main(void) ksft_print_header(); - test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + - ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + - ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + - ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - - ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; + test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; + for (i = 0; i < ARRAY_SIZE(test_regs); i++) + for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++) + test_cnt++; ksft_set_plan(test_cnt); diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c index 2d189f3da228..1763b9d45400 100644 --- a/tools/testing/selftests/kvm/arm64/smccc_filter.c +++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c @@ -22,8 +22,20 @@ enum smccc_conduit { SMC_INSN, }; +static bool test_runs_at_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + struct kvm_vcpu_init init; + + kvm_get_default_vcpu_target(vm, &init); + kvm_vm_free(vm); + + return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + #define for_each_conduit(conduit) \ - for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++) + for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \ + conduit <= SMC_INSN; conduit++) static void guest_main(uint32_t func_id, enum smccc_conduit conduit) { @@ -64,7 +76,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) struct kvm_vm *vm; vm = vm_create(1); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); /* * Enable in-kernel emulation of PSCI to ensure that calls are denied @@ -73,6 +85,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main); + kvm_arch_vm_finalize_vcpus(vm); return vm; } diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index a8e0f46bc0ab..8d6d3a4ae4db 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -994,6 +994,8 @@ int main(int ac, char **av) int pa_bits; int cnt_impl = 0; + test_disable_default_vgic(); + pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; max_phys_size = 1ULL << pa_bits; diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index a09dd423c2d7..6338f5bbdb70 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -752,7 +752,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) vcpu_args_set(vcpu, 1, args_gva); gic_fd = vgic_v3_setup(vm, 1, nr_irqs); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handlers[args.eoi_split][args.level_sensitive]); @@ -802,6 +801,9 @@ int main(int argc, char **argv) int opt; bool eoi_split = false; + TEST_REQUIRE(kvm_supports_vgic_v3()); + test_disable_default_vgic(); + while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index fc4fe52fb6f8..87922a89b134 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -27,7 +27,7 @@ static vm_paddr_t gpa_base; static struct kvm_vm *vm; static struct kvm_vcpu **vcpus; -static int gic_fd, its_fd; +static int its_fd; static struct test_data { bool request_vcpus_stop; @@ -214,9 +214,6 @@ static void setup_test_data(void) static void setup_gic(void) { - gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); - its_fd = vgic_its_setup(vm); } @@ -355,7 +352,6 @@ static void setup_vm(void) static void destroy_vm(void) { close(its_fd); - close(gic_fd); kvm_vm_free(vm); free(vcpus); } @@ -374,6 +370,8 @@ int main(int argc, char **argv) u32 nr_threads; int c; + TEST_REQUIRE(kvm_supports_vgic_v3()); + while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { switch (c) { case 'v': diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index a0c4ab839155..ae36325c022f 100644 --- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -28,7 +28,6 @@ struct vpmu_vm { struct kvm_vm *vm; struct kvm_vcpu *vcpu; - int gic_fd; }; static struct vpmu_vm vpmu_vm; @@ -45,11 +44,6 @@ static uint64_t get_pmcr_n(uint64_t pmcr) return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); } -static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) -{ - u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); -} - static uint64_t get_counters_mask(uint64_t n) { uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX); @@ -415,10 +409,6 @@ static void create_vpmu_vm(void *guest_code) .attr = KVM_ARM_VCPU_PMU_V3_IRQ, .addr = (uint64_t)&irq, }; - struct kvm_device_attr init_attr = { - .group = KVM_ARM_VCPU_PMU_V3_CTRL, - .attr = KVM_ARM_VCPU_PMU_V3_INIT, - }; /* The test creates the vpmu_vm multiple times. Ensure a clean state */ memset(&vpmu_vm, 0, sizeof(vpmu_vm)); @@ -431,13 +421,12 @@ static void create_vpmu_vm(void *guest_code) } /* Create vCPU with PMUv3 */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); vcpu_init_descriptor_tables(vpmu_vm.vcpu); - vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64); - __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, - "Failed to create vgic-v3, skipping"); + + kvm_arch_vm_finalize_vcpus(vpmu_vm.vm); /* Make sure that PMUv3 support is indicated in the ID register */ dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); @@ -446,14 +435,11 @@ static void create_vpmu_vm(void *guest_code) pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); - /* Initialize vPMU */ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr); - vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr); } static void destroy_vpmu_vm(void) { - close(vpmu_vm.gic_fd); kvm_vm_free(vpmu_vm.vm); } @@ -475,33 +461,28 @@ static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) } } -static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) +static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail) { struct kvm_vcpu *vcpu; - uint64_t pmcr, pmcr_orig; + unsigned int prev; + int ret; create_vpmu_vm(guest_code); vcpu = vpmu_vm.vcpu; - pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); - pmcr = pmcr_orig; + prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0))); - /* - * Setting a larger value of PMCR.N should not modify the field, and - * return a success. - */ - set_pmcr_n(&pmcr, pmcr_n); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); - pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); + ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters); if (expect_fail) - TEST_ASSERT(pmcr_orig == pmcr, - "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", - pmcr, pmcr_n); + TEST_ASSERT(ret && errno == EINVAL, + "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded", + nr_counters, prev); else - TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), - "Failed to update PMCR.N to %lu (received: %lu)", - pmcr_n, get_pmcr_n(pmcr)); + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); + + vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL); } /* @@ -516,11 +497,11 @@ static void run_access_test(uint64_t pmcr_n) pr_debug("Test with pmcr_n %lu\n", pmcr_n); - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); vcpu = vpmu_vm.vcpu; /* Save the initial sp to restore them later to run the guest again */ - sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1)); + sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1)); run_vcpu(vcpu, pmcr_n); @@ -528,11 +509,11 @@ static void run_access_test(uint64_t pmcr_n) * Reset and re-initialize the vCPU, and run the guest code again to * check if PMCR_EL0.N is preserved. */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); aarch64_vcpu_setup(vcpu, &init); vcpu_init_descriptor_tables(vcpu); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp); vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); run_vcpu(vcpu, pmcr_n); @@ -557,7 +538,7 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) uint64_t set_reg_id, clr_reg_id, reg_val; uint64_t valid_counters_mask, max_counters_mask; - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); vcpu = vpmu_vm.vcpu; valid_counters_mask = get_counters_mask(pmcr_n); @@ -611,7 +592,7 @@ static void run_error_test(uint64_t pmcr_n) { pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); - test_create_vpmu_vm_with_pmcr_n(pmcr_n, true); + test_create_vpmu_vm_with_nr_counters(pmcr_n, true); destroy_vpmu_vm(); } @@ -629,11 +610,25 @@ static uint64_t get_pmcr_n_limit(void) return get_pmcr_n(pmcr); } +static bool kvm_supports_nr_counters_attr(void) +{ + bool supported; + + create_vpmu_vm(NULL); + supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS); + destroy_vpmu_vm(); + + return supported; +} + int main(void) { uint64_t i, pmcr_n; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); + TEST_REQUIRE(kvm_supports_vgic_v3()); + TEST_REQUIRE(kvm_supports_nr_counters_attr()); pmcr_n = get_pmcr_n_limit(); for (i = 0; i <= pmcr_n; i++) { diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index e79817bd0e29..0a1ea1d1e2d8 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -20,38 +20,6 @@ #include "guest_modes.h" #include "ucall_common.h" -#ifdef __aarch64__ -#include "arm64/vgic.h" - -static int gic_fd; - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ - /* - * The test can still run even if hardware does not support GICv3, as it - * is only an optimization to reduce guest exits. - */ - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ - if (gic_fd > 0) - close(gic_fd); -} - -#else /* __aarch64__ */ - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ -} - -#endif - /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -166,8 +134,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, dirty_log_manual_caps); - arch_setup_vm(vm, nr_vcpus); - /* Start the iterations */ iteration = 0; host_quit = false; @@ -285,7 +251,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) } memstress_free_bitmaps(bitmaps, p->slots); - arch_cleanup_vm(vm); memstress_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 23593d9eeba9..d58a641b0e6a 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -585,6 +585,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, log_mode_create_vm_done(vm); *vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index 91f05f78e824..f4644c9d2d3b 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -116,10 +116,13 @@ void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) } #ifdef __aarch64__ -static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init) +static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c, + struct kvm_vcpu_init *init) { struct vcpu_reg_sublist *s; + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init); + for_each_sublist(c, s) if (s->capability) init->features[s->feature / 32] |= 1 << (s->feature % 32); @@ -127,10 +130,10 @@ static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *ini static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) { - struct kvm_vcpu_init init = { .target = -1, }; + struct kvm_vcpu_init init; struct kvm_vcpu *vcpu; - prepare_vcpu_init(c, &init); + prepare_vcpu_init(vm, c, &init); vcpu = __vm_vcpu_add(vm, 0); aarch64_vcpu_setup(vcpu, &init); diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ce687f8d248f..b3ca6737f304 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -13,12 +13,16 @@ #include <linux/bitmap.h> #include <linux/falloc.h> +#include <linux/sizes.h> +#include <setjmp.h> +#include <signal.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> #include "kvm_util.h" #include "test_util.h" +#include "ucall_common.h" static void test_file_read_write(int fd) { @@ -34,12 +38,83 @@ static void test_file_read_write(int fd) "pwrite on a guest_mem fd should fail"); } -static void test_mmap(int fd, size_t page_size) +static void test_mmap_supported(int fd, size_t page_size, size_t total_size) +{ + const char val = 0xaa; + char *mem; + size_t i; + int ret; + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + + memset(mem, val, total_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, + page_size); + TEST_ASSERT(!ret, "fallocate the first page should succeed."); + + for (i = 0; i < page_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00); + for (; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + memset(mem, val, page_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + ret = munmap(mem, total_size); + TEST_ASSERT(!ret, "munmap() should succeed."); +} + +static sigjmp_buf jmpbuf; +void fault_sigbus_handler(int signum) +{ + siglongjmp(jmpbuf, 1); +} + +static void test_fault_overflow(int fd, size_t page_size, size_t total_size) +{ + struct sigaction sa_old, sa_new = { + .sa_handler = fault_sigbus_handler, + }; + size_t map_size = total_size * 4; + const char val = 0xaa; + char *mem; + size_t i; + int ret; + + mem = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + + sigaction(SIGBUS, &sa_new, &sa_old); + if (sigsetjmp(jmpbuf, 1) == 0) { + memset(mem, 0xaa, map_size); + TEST_ASSERT(false, "memset() should have triggered SIGBUS."); + } + sigaction(SIGBUS, &sa_old, NULL); + + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + ret = munmap(mem, map_size); + TEST_ASSERT(!ret, "munmap() should succeed."); +} + +static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size) { char *mem; mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); TEST_ASSERT_EQ(mem, MAP_FAILED); + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT_EQ(mem, MAP_FAILED); } static void test_file_size(int fd, size_t page_size, size_t total_size) @@ -120,80 +195,187 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) } } -static void test_create_guest_memfd_invalid(struct kvm_vm *vm) +static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, + uint64_t guest_memfd_flags, + size_t page_size) { - size_t page_size = getpagesize(); - uint64_t flag; size_t size; int fd; for (size = 1; size < page_size; size++) { - fd = __vm_create_guest_memfd(vm, size, 0); - TEST_ASSERT(fd == -1 && errno == EINVAL, + fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags); + TEST_ASSERT(fd < 0 && errno == EINVAL, "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL", size); } - - for (flag = BIT(0); flag; flag <<= 1) { - fd = __vm_create_guest_memfd(vm, page_size, flag); - TEST_ASSERT(fd == -1 && errno == EINVAL, - "guest_memfd() with flag '0x%lx' should fail with EINVAL", - flag); - } } static void test_create_guest_memfd_multiple(struct kvm_vm *vm) { int fd1, fd2, ret; struct stat st1, st2; + size_t page_size = getpagesize(); - fd1 = __vm_create_guest_memfd(vm, 4096, 0); + fd1 = __vm_create_guest_memfd(vm, page_size, 0); TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); ret = fstat(fd1, &st1); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size"); + TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size"); - fd2 = __vm_create_guest_memfd(vm, 8192, 0); + fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0); TEST_ASSERT(fd2 != -1, "memfd creation should succeed"); ret = fstat(fd2, &st2); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size"); + TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size"); ret = fstat(fd1, &st1); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size"); + TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size"); TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers"); close(fd2); close(fd1); } -int main(int argc, char *argv[]) +static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags) { - size_t page_size; - size_t total_size; + size_t page_size = getpagesize(); + uint64_t flag; int fd; - struct kvm_vm *vm; - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + for (flag = BIT(0); flag; flag <<= 1) { + fd = __vm_create_guest_memfd(vm, page_size, flag); + if (flag & valid_flags) { + TEST_ASSERT(fd >= 0, + "guest_memfd() with flag '0x%lx' should succeed", + flag); + close(fd); + } else { + TEST_ASSERT(fd < 0 && errno == EINVAL, + "guest_memfd() with flag '0x%lx' should fail with EINVAL", + flag); + } + } +} + +static void test_guest_memfd(unsigned long vm_type) +{ + uint64_t flags = 0; + struct kvm_vm *vm; + size_t total_size; + size_t page_size; + int fd; page_size = getpagesize(); total_size = page_size * 4; - vm = vm_create_barebones(); + vm = vm_create_barebones_type(vm_type); + + if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP)) + flags |= GUEST_MEMFD_FLAG_MMAP; - test_create_guest_memfd_invalid(vm); test_create_guest_memfd_multiple(vm); + test_create_guest_memfd_invalid_sizes(vm, flags, page_size); - fd = vm_create_guest_memfd(vm, total_size, 0); + fd = vm_create_guest_memfd(vm, total_size, flags); test_file_read_write(fd); - test_mmap(fd, page_size); + + if (flags & GUEST_MEMFD_FLAG_MMAP) { + test_mmap_supported(fd, page_size, total_size); + test_fault_overflow(fd, page_size, total_size); + } else { + test_mmap_not_supported(fd, page_size, total_size); + } + test_file_size(fd, page_size, total_size); test_fallocate(fd, page_size, total_size); test_invalid_punch_hole(fd, page_size, total_size); + test_guest_memfd_flags(vm, flags); + close(fd); + kvm_vm_free(vm); +} + +static void guest_code(uint8_t *mem, uint64_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + __GUEST_ASSERT(mem[i] == 0xaa, + "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]); + + memset(mem, 0xff, size); + GUEST_DONE(); +} + +static void test_guest_memfd_guest(void) +{ + /* + * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back + * the guest's code, stack, and page tables, and low memory contains + * the PCI hole and other MMIO regions that need to be avoided. + */ + const uint64_t gpa = SZ_4G; + const int slot = 1; + + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint8_t *mem; + size_t size; + int fd, i; + + if (!kvm_has_cap(KVM_CAP_GUEST_MEMFD_MMAP)) + return; + + vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code); + + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP), + "Default VM type should always support guest_memfd mmap()"); + + size = vm->page_size; + fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP); + vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + memset(mem, 0xaa, size); + munmap(mem, size); + + virt_pg_map(vm, gpa, gpa); + vcpu_args_set(vcpu, 2, gpa, size); + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + for (i = 0; i < size; i++) + TEST_ASSERT_EQ(mem[i], 0xff); + + close(fd); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + unsigned long vm_types, vm_type; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + + /* + * Not all architectures support KVM_CAP_VM_TYPES. However, those that + * support guest_memfd have that support for the default VM type. + */ + vm_types = kvm_check_cap(KVM_CAP_VM_TYPES); + if (!vm_types) + vm_types = BIT(VM_TYPE_DEFAULT); + + for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types)) + test_guest_memfd(vm_type); + + test_guest_memfd_guest(); } diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index bf461de34785..e2c4e9f0010f 100644 --- a/tools/testing/selftests/kvm/include/arm64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h @@ -155,4 +155,28 @@ static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec) timer_set_tval(timer, msec_to_cycles(msec)); } +static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER : + KVM_ARM_VCPU_TIMER_IRQ_VTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + +static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER : + KVM_ARM_VCPU_TIMER_IRQ_PTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + #endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h index e43a57d99b56..b973bb2c64a6 100644 --- a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h @@ -2,6 +2,9 @@ #ifndef SELFTEST_KVM_UTIL_ARCH_H #define SELFTEST_KVM_UTIL_ARCH_H -struct kvm_vm_arch {}; +struct kvm_vm_arch { + bool has_gic; + int gic_fd; +}; #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 255fed769a8a..6f481475c135 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -175,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level); uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); static inline void cpu_relax(void) @@ -300,4 +301,77 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, /* Execute a Wait For Interrupt instruction. */ void wfi(void); +void test_wants_mte(void); +void test_disable_default_vgic(void); + +bool vm_supports_el2(struct kvm_vm *vm); +static bool vcpu_has_el2(struct kvm_vcpu *vcpu) +{ + return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + +#define MAPPED_EL2_SYSREG(el2, el1) \ + case SYS_##el1: \ + if (vcpu_has_el2(vcpu)) \ + alias = SYS_##el2; \ + break + + +static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding) +{ + u32 alias = encoding; + + BUILD_BUG_ON(!__builtin_constant_p(encoding)); + + switch (encoding) { + MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1); + MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1); + MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1); + MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1); + MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1); + MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1); + MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1); + MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1); + MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1); + MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1); + MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1); + MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1); + MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1); + MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1); + MAPPED_EL2_SYSREG(POR_EL2, POR_EL1); + MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1); + MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1); + MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1); + MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1); + MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1); + MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1); + MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1); + case SYS_SP_EL1: + if (!vcpu_has_el2(vcpu)) + return ARM64_CORE_REG(sp_el1); + + alias = SYS_SP_EL2; + break; + default: + BUILD_BUG(); + } + + return KVM_ARM64_SYS_REG(alias); +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init); + +static inline unsigned int get_current_el(void) +{ + return (read_sysreg(CurrentEL) >> 2) & 0x3; +} + +#define do_smccc(...) \ +do { \ + if (get_current_el() == 2) \ + smccc_smc(__VA_ARGS__); \ + else \ + smccc_hvc(__VA_ARGS__); \ +} while (0) + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h index c481d0c00a5d..688beccc9436 100644 --- a/tools/testing/selftests/kvm/include/arm64/vgic.h +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h @@ -16,6 +16,9 @@ ((uint64_t)(flags) << 12) | \ index) +bool kvm_supports_vgic_v3(void); +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); +void __vgic_v3_init(int fd); int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); #define VGIC_MAX_RESERVED 1023 diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 23a506d7eca3..26cc30290e76 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -64,6 +64,9 @@ struct kvm_vcpu { #ifdef __x86_64__ struct kvm_cpuid2 *cpuid; #endif +#ifdef __aarch64__ + struct kvm_vcpu_init init; +#endif struct kvm_binary_stats stats; struct kvm_dirty_gfn *dirty_gfns; uint32_t fetch_index; @@ -260,13 +263,18 @@ int __open_path_or_exit(const char *path, int flags, const char *enoent_help); int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); -bool get_kvm_param_bool(const char *param); -bool get_kvm_intel_param_bool(const char *param); -bool get_kvm_amd_param_bool(const char *param); +int kvm_get_module_param_integer(const char *module_name, const char *param); +bool kvm_get_module_param_bool(const char *module_name, const char *param); + +static inline bool get_kvm_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm", param); +} -int get_kvm_param_integer(const char *param); -int get_kvm_intel_param_integer(const char *param); -int get_kvm_amd_param_integer(const char *param); +static inline int get_kvm_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm", param); +} unsigned int kvm_check_cap(long cap); @@ -1257,7 +1265,9 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) */ void kvm_selftest_arch_init(void); -void kvm_arch_vm_post_create(struct kvm_vm *vm); +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus); +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm); +void kvm_arch_vm_release(struct kvm_vm *vm); bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index 162f303d9daa..e58282488beb 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -9,6 +9,7 @@ #include <linux/stringify.h> #include <asm/csr.h> +#include <asm/vdso/processor.h> #include "kvm_util.h" #define INSN_OPCODE_MASK 0x007c diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h index 3c10c4dc0ae8..72575eadb63a 100644 --- a/tools/testing/selftests/kvm/include/x86/pmu.h +++ b/tools/testing/selftests/kvm/include/x86/pmu.h @@ -5,8 +5,11 @@ #ifndef SELFTEST_KVM_PMU_H #define SELFTEST_KVM_PMU_H +#include <stdbool.h> #include <stdint.h> +#include <linux/bits.h> + #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 /* @@ -61,6 +64,11 @@ #define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00) #define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00) #define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01) +#define INTEL_ARCH_TOPDOWN_BE_BOUND RAW_EVENT(0xa4, 0x02) +#define INTEL_ARCH_TOPDOWN_BAD_SPEC RAW_EVENT(0x73, 0x00) +#define INTEL_ARCH_TOPDOWN_FE_BOUND RAW_EVENT(0x9c, 0x01) +#define INTEL_ARCH_TOPDOWN_RETIRING RAW_EVENT(0xc2, 0x02) +#define INTEL_ARCH_LBR_INSERTS RAW_EVENT(0xe4, 0x01) #define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00) #define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) @@ -80,6 +88,11 @@ enum intel_pmu_architectural_events { INTEL_ARCH_BRANCHES_RETIRED_INDEX, INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX, INTEL_ARCH_TOPDOWN_SLOTS_INDEX, + INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX, + INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_RETIRING_INDEX, + INTEL_ARCH_LBR_INSERTS_INDEX, NR_INTEL_ARCH_EVENTS, }; @@ -94,4 +107,17 @@ enum amd_pmu_zen_events { extern const uint64_t intel_pmu_arch_events[]; extern const uint64_t amd_pmu_zen_events[]; +enum pmu_errata { + INSTRUCTIONS_RETIRED_OVERCOUNT, + BRANCHES_RETIRED_OVERCOUNT, +}; +extern uint64_t pmu_errata_mask; + +void kvm_init_pmu_errata(void); + +static inline bool this_pmu_has_errata(enum pmu_errata errata) +{ + return pmu_errata_mask & BIT_ULL(errata); +} + #endif /* SELFTEST_KVM_PMU_H */ diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 2efb05c2f2fb..fbe875eafca5 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -34,6 +34,8 @@ extern uint64_t guest_tsc_khz; #define NMI_VECTOR 0x02 +const char *ex_str(int vector); + #define X86_EFLAGS_FIXED (1u << 1) #define X86_CR4_VME (1ul << 0) @@ -265,7 +267,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) #define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23) #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) -#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7) +#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12) #define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31) #define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4) #define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12) @@ -332,6 +334,11 @@ struct kvm_x86_pmu_feature { #define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5) #define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6) #define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7) +#define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8) +#define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9) +#define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10) +#define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11) +#define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12) #define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0) #define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1) @@ -1179,6 +1186,12 @@ struct idt_entry { void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +/* + * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be + * used to signal "no expcetion". + */ +#define KVM_MAGIC_DE_VECTOR 0xff + /* If a toddler were to say "abracadabra". */ #define KVM_EXCEPTION_MAGIC 0xabacadabaULL @@ -1314,6 +1327,26 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) bool kvm_is_tdp_enabled(void); +static inline bool get_kvm_intel_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_intel", param); +} + +static inline bool get_kvm_amd_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_amd", param); +} + +static inline int get_kvm_intel_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_intel", param); +} + +static inline int get_kvm_amd_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_amd", param); +} + static inline bool kvm_is_pmu_enabled(void) { return get_kvm_param_bool("enable_pmu"); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index eb115123d741..369a4c87dd8f 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -12,6 +12,7 @@ #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" +#include "vgic.h" #include <linux/bitfield.h> #include <linux/sizes.h> @@ -185,7 +186,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) _virt_pg_map(vm, vaddr, paddr, attr_idx); } -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level) { uint64_t *ptep; @@ -195,17 +196,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 0) + return ptep; switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 1) + break; /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 2) + break; /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; @@ -223,6 +230,11 @@ unmapped_gva: exit(EXIT_FAILURE); } +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return virt_get_pte_hva_at_level(vm, gva, 3); +} + vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep = virt_get_pte_hva(vm, gva); @@ -266,31 +278,49 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } +bool vm_supports_el2(struct kvm_vm *vm) +{ + const char *value = getenv("NV"); + + if (value && *value == '0') + return false; + + return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic; +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init) +{ + struct kvm_vcpu_init preferred = {}; + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); + if (vm_supports_el2(vm)) + preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + + *init = preferred; +} + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; struct kvm_vm *vm = vcpu->vm; uint64_t sctlr_el1, tcr_el1, ttbr0_el1; - if (!init) + if (!init) { + kvm_get_default_vcpu_target(vm, &default_init); init = &default_init; - - if (init->target == -1) { - struct kvm_vcpu_init preferred; - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); - init->target = preferred.target; } vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); + vcpu->init = *init; /* * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 * registers, which the variable argument list macros do. */ - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20); - sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1)); - tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1)); + sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1)); /* Configure base granule size */ switch (vm->mode) { @@ -357,11 +387,17 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) if (use_lpa2_pte_format(vm)) tcr_el1 |= TCR_DS; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); + + if (!vcpu_has_el2(vcpu)) + return; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), + HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H); } void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) @@ -395,7 +431,7 @@ static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, aarch64_vcpu_setup(vcpu, init); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size); return vcpu; } @@ -465,7 +501,7 @@ void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) { extern char vectors; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors); } void route_exception(struct ex_regs *regs, int vector) @@ -653,3 +689,39 @@ void wfi(void) { asm volatile("wfi"); } + +static bool request_mte; +static bool request_vgic = true; + +void test_wants_mte(void) +{ + request_mte = true; +} + +void test_disable_default_vgic(void) +{ + request_vgic = false; +} + +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE)) + vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); + + if (request_vgic && kvm_supports_vgic_v3()) { + vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64); + vm->arch.has_gic = true; + } +} + +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + __vgic_v3_init(vm->arch.gic_fd); +} + +void kvm_arch_vm_release(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + close(vm->arch.gic_fd); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index 4427f43f73ea..d0f7bd0984b8 100644 --- a/tools/testing/selftests/kvm/lib/arm64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c @@ -15,6 +15,17 @@ #include "gic.h" #include "gic_v3.h" +bool kvm_supports_vgic_v3(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + int r; + + r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + kvm_vm_free(vm); + + return !r; +} + /* * vGIC-v3 default host setup * @@ -30,24 +41,11 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) { int gic_fd; uint64_t attr; - struct list_head *iter; - unsigned int nr_gic_pages, nr_vcpus_created = 0; - - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); - - /* - * Make sure that the caller is infact calling this - * function after all the vCPUs are added. - */ - list_for_each(iter, &vm->vcpus) - nr_vcpus_created++; - TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", - nr_vcpus, nr_vcpus_created); + unsigned int nr_gic_pages; /* Distributor setup */ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); @@ -56,9 +54,6 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - attr = GICD_BASE_GPA; kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); @@ -73,10 +68,39 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + return gic_fd; +} + +void __vgic_v3_init(int fd) +{ + kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); +} - return gic_fd; +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +{ + unsigned int nr_vcpus_created = 0; + struct list_head *iter; + int fd; + + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); + + /* + * Make sure that the caller is infact calling this + * function after all the vCPUs are added. + */ + list_for_each(iter, &vm->vcpus) + nr_vcpus_created++; + TEST_ASSERT(nr_vcpus == nr_vcpus_created, + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", + nr_vcpus, nr_vcpus_created); + + fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs); + if (fd < 0) + return fd; + + __vgic_v3_init(fd); + return fd; } /* should only work for level sensitive interrupts */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index c3f5142b0a54..6743fbd9bd67 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -24,7 +24,7 @@ uint32_t guest_random_seed; struct guest_random_state guest_rng; static uint32_t last_guest_seed; -static int vcpu_mmap_sz(void); +static size_t vcpu_mmap_sz(void); int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { @@ -95,7 +95,7 @@ static ssize_t get_module_param(const char *module_name, const char *param, return bytes_read; } -static int get_module_param_integer(const char *module_name, const char *param) +int kvm_get_module_param_integer(const char *module_name, const char *param) { /* * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the @@ -119,7 +119,7 @@ static int get_module_param_integer(const char *module_name, const char *param) return atoi_paranoid(value); } -static bool get_module_param_bool(const char *module_name, const char *param) +bool kvm_get_module_param_bool(const char *module_name, const char *param) { char value; ssize_t r; @@ -135,36 +135,6 @@ static bool get_module_param_bool(const char *module_name, const char *param) TEST_FAIL("Unrecognized value '%c' for boolean module param", value); } -bool get_kvm_param_bool(const char *param) -{ - return get_module_param_bool("kvm", param); -} - -bool get_kvm_intel_param_bool(const char *param) -{ - return get_module_param_bool("kvm_intel", param); -} - -bool get_kvm_amd_param_bool(const char *param) -{ - return get_module_param_bool("kvm_amd", param); -} - -int get_kvm_param_integer(const char *param) -{ - return get_module_param_integer("kvm", param); -} - -int get_kvm_intel_param_integer(const char *param) -{ - return get_module_param_integer("kvm_intel", param); -} - -int get_kvm_amd_param_integer(const char *param) -{ - return get_module_param_integer("kvm_amd", param); -} - /* * Capability * @@ -517,7 +487,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, guest_rng = new_guest_random_state(guest_random_seed); sync_global_to_guest(vm, guest_rng); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, nr_runnable_vcpus); return vm; } @@ -555,6 +525,7 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, for (i = 0; i < nr_vcpus; ++i) vcpus[i] = vm_vcpu_add(vm, i, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -805,6 +776,8 @@ void kvm_vm_release(struct kvm_vm *vmp) /* Free cached stats metadata and close FD */ kvm_stats_release(&vmp->stats); + + kvm_arch_vm_release(vmp); } static void __vm_mem_region_delete(struct kvm_vm *vm, @@ -1321,14 +1294,14 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, } /* Returns the size of a vCPU's kvm_run structure. */ -static int vcpu_mmap_sz(void) +static size_t vcpu_mmap_sz(void) { int dev_fd, ret; dev_fd = open_kvm_dev_path_or_exit(); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); - TEST_ASSERT(ret >= sizeof(struct kvm_run), + TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run), KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); close(dev_fd); @@ -1369,7 +1342,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " - "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", + "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); @@ -2330,7 +2303,15 @@ void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, TEST_FAIL("Unable to find stat '%s'", name); } -__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ +} + +__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ +} + +__weak void kvm_arch_vm_release(struct kvm_vm *vm) { } diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c index f31f0427c17c..34cb57d1d671 100644 --- a/tools/testing/selftests/kvm/lib/x86/pmu.c +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include "kvm_util.h" +#include "processor.h" #include "pmu.h" const uint64_t intel_pmu_arch_events[] = { @@ -19,6 +20,11 @@ const uint64_t intel_pmu_arch_events[] = { INTEL_ARCH_BRANCHES_RETIRED, INTEL_ARCH_BRANCHES_MISPREDICTED, INTEL_ARCH_TOPDOWN_SLOTS, + INTEL_ARCH_TOPDOWN_BE_BOUND, + INTEL_ARCH_TOPDOWN_BAD_SPEC, + INTEL_ARCH_TOPDOWN_FE_BOUND, + INTEL_ARCH_TOPDOWN_RETIRING, + INTEL_ARCH_LBR_INSERTS, }; kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); @@ -29,3 +35,46 @@ const uint64_t amd_pmu_zen_events[] = { AMD_ZEN_BRANCHES_MISPREDICTED, }; kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); + +/* + * For Intel Atom CPUs, the PMU events "Instruction Retired" or + * "Branch Instruction Retired" may be overcounted for some certain + * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD + * and complex SGX/SMX/CSTATE instructions/flows. + * + * The detailed information can be found in the errata (section SRF7): + * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/ + * + * For the Atom platforms before Sierra Forest (including Sierra Forest), + * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would + * be overcounted on these certain instructions, but for Clearwater Forest + * only "Instruction Retired" event is overcounted on these instructions. + */ +static uint64_t get_pmu_errata(void) +{ + if (!this_cpu_is_intel()) + return 0; + + if (this_cpu_family() != 0x6) + return 0; + + switch (this_cpu_model()) { + case 0xDD: /* Clearwater Forest */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT); + case 0xAF: /* Sierra Forest */ + case 0x4D: /* Avaton, Rangely */ + case 0x5F: /* Denverton */ + case 0x86: /* Jacobsville */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) | + BIT_ULL(BRANCHES_RETIRED_OVERCOUNT); + default: + return 0; + } +} + +uint64_t pmu_errata_mask; + +void kvm_init_pmu_errata(void) +{ + pmu_errata_mask = get_pmu_errata(); +} diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index d4c19ac885a9..c748cd9b2eef 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -6,6 +6,7 @@ #include "linux/bitmap.h" #include "test_util.h" #include "kvm_util.h" +#include "pmu.h" #include "processor.h" #include "sev.h" @@ -23,6 +24,39 @@ bool host_cpu_is_intel; bool is_forced_emulation_enabled; uint64_t guest_tsc_khz; +const char *ex_str(int vector) +{ + switch (vector) { +#define VEC_STR(v) case v##_VECTOR: return "#" #v + case DE_VECTOR: return "no exception"; + case KVM_MAGIC_DE_VECTOR: return "#DE"; + VEC_STR(DB); + VEC_STR(NMI); + VEC_STR(BP); + VEC_STR(OF); + VEC_STR(BR); + VEC_STR(UD); + VEC_STR(NM); + VEC_STR(DF); + VEC_STR(TS); + VEC_STR(NP); + VEC_STR(SS); + VEC_STR(GP); + VEC_STR(PF); + VEC_STR(MF); + VEC_STR(AC); + VEC_STR(MC); + VEC_STR(XM); + VEC_STR(VE); + VEC_STR(CP); + VEC_STR(HV); + VEC_STR(VC); + VEC_STR(SX); + default: return "#??"; +#undef VEC_STR + } +} + static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " @@ -557,7 +591,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs) return false; if (regs->vector == DE_VECTOR) - return false; + regs->vector = KVM_MAGIC_DE_VECTOR; regs->rip = regs->r11; regs->r9 = regs->vector; @@ -625,7 +659,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) REPORT_GUEST_ASSERT(uc); } -void kvm_arch_vm_post_create(struct kvm_vm *vm) +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) { int r; @@ -638,6 +672,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm) sync_global_to_guest(vm, host_cpu_is_intel); sync_global_to_guest(vm, host_cpu_is_amd); sync_global_to_guest(vm, is_forced_emulation_enabled); + sync_global_to_guest(vm, pmu_errata_mask); if (is_sev_vm(vm)) { struct kvm_sev_init init = { 0 }; @@ -1269,6 +1304,8 @@ void kvm_selftest_arch_init(void) host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); + + kvm_init_pmu_errata(); } bool sys_clocksource_is_based_on_tsc(void) diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index c81a84990eab..3cdfa3b19b85 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -22,6 +22,7 @@ #include "processor.h" #include "test_util.h" #include "guest_modes.h" +#include "ucall_common.h" #define DUMMY_MEMSLOT_INDEX 7 diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index e3711beff7f3..5087d082c4b0 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -25,6 +25,7 @@ #include <test_util.h> #include <kvm_util.h> #include <processor.h> +#include <ucall_common.h> #define MEM_EXTRA_SIZE SZ_64K diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index a0b7dabb5040..705ab3d7778b 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -80,9 +80,11 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: @@ -103,6 +105,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFWMA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB: @@ -128,6 +132,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR: return true; @@ -255,6 +260,8 @@ static const char *config_id_to_str(const char *prefix, __u64 id) return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)"; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicbop_block_size)"; case KVM_REG_RISCV_CONFIG_REG(mvendorid): return "KVM_REG_RISCV_CONFIG_REG(mvendorid)"; case KVM_REG_RISCV_CONFIG_REG(marchid): @@ -532,9 +539,11 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZCF), KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFBFMIN), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOP), KVM_ISA_EXT_ARR(ZICBOZ), KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), @@ -555,6 +564,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZTSO), KVM_ISA_EXT_ARR(ZVBB), KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFBFMIN), + KVM_ISA_EXT_ARR(ZVFBFWMA), KVM_ISA_EXT_ARR(ZVFH), KVM_ISA_EXT_ARR(ZVFHMIN), KVM_ISA_EXT_ARR(ZVKB), @@ -627,6 +638,7 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off) KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), }; @@ -683,6 +695,19 @@ static const char *sbi_sta_id_to_str(__u64 reg_off) return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off); } +static const char *sbi_fwft_id_to_str(__u64 reg_off) +{ + switch (reg_off) { + case 0: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable)"; + case 1: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags)"; + case 2: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value)"; + case 3: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable)"; + case 4: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags)"; + case 5: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value)"; + } + return strdup_printf("KVM_REG_RISCV_SBI_FWFT | %lld /* UNKNOWN */", reg_off); +} + static const char *sbi_id_to_str(const char *prefix, __u64 id) { __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE); @@ -695,6 +720,8 @@ static const char *sbi_id_to_str(const char *prefix, __u64 id) switch (reg_subtype) { case KVM_REG_RISCV_SBI_STA: return sbi_sta_id_to_str(reg_off); + case KVM_REG_RISCV_SBI_FWFT: + return sbi_fwft_id_to_str(reg_off); } return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); @@ -780,10 +807,13 @@ void print_reg(const char *prefix, __u64 id) */ static __u64 base_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp), @@ -859,11 +889,26 @@ static __u64 sbi_sta_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi), }; +static __u64 sbi_fwft_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value), +}; + static __u64 zicbom_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM, }; +static __u64 zicbop_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP, +}; + static __u64 zicboz_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ, @@ -1010,8 +1055,13 @@ static __u64 vector_regs[] = { #define SUBLIST_SBI_STA \ {"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \ .regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),} +#define SUBLIST_SBI_FWFT \ + {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \ + .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),} #define SUBLIST_ZICBOM \ {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),} +#define SUBLIST_ZICBOP \ + {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),} #define SUBLIST_ZICBOZ \ {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),} #define SUBLIST_AIA \ @@ -1092,6 +1142,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA); KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU); KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN); KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); +KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT); KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); @@ -1127,9 +1178,11 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); +KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); +KVM_ISA_EXT_SUBLIST_CONFIG(zicbop, ZICBOP); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE); KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); @@ -1150,6 +1203,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT); KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO); KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB); KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfmin, ZVFBFMIN); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfwma, ZVFBFWMA); KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH); KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN); KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB); @@ -1167,6 +1222,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_sbi_pmu, &config_sbi_dbcn, &config_sbi_susp, + &config_sbi_fwft, &config_aia, &config_fp_f, &config_fp_d, @@ -1201,9 +1257,11 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zcf, &config_zcmop, &config_zfa, + &config_zfbfmin, &config_zfh, &config_zfhmin, &config_zicbom, + &config_zicbop, &config_zicboz, &config_ziccrse, &config_zicntr, @@ -1224,6 +1282,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_ztso, &config_zvbb, &config_zvbc, + &config_zvfbfmin, + &config_zvfbfwma, &config_zvfh, &config_zvfhmin, &config_zvkb, diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index 85cc8c18d6e7..e39a724fe860 100644 --- a/tools/testing/selftests/kvm/s390/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c @@ -145,7 +145,7 @@ static void finish_vm_setup(struct kvm_vm *vm) slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, 0); } static struct kvm_vm *create_vm_two_memslots(void) diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c index 27255880dabd..aded795d42be 100644 --- a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c +++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c @@ -291,7 +291,7 @@ int main(int argc, char *argv[]) ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); free(array); } else { - ksft_test_result_skip("%s feature is not avaialable\n", + ksft_test_result_skip("%s feature is not available\n", testlist[idx].subfunc_name); } } diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index cce2520af720..8edc1fca345b 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -118,7 +118,7 @@ static int64_t smccc(uint32_t func, uint64_t arg) { struct arm_smccc_res res; - smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res); + do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res); return res.a0; } diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c index 2ac89d6c1e46..8926cfe0e209 100644 --- a/tools/testing/selftests/kvm/x86/fastops_test.c +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -8,14 +8,21 @@ * to set RFLAGS.CF based on whether or not the input is even or odd, so that * instructions like ADC and SBB are deterministic. */ +#define fastop(__insn) \ + "bt $0, %[bt_val]\n\t" \ + __insn "\n\t" \ + "pushfq\n\t" \ + "pop %[flags]\n\t" + +#define flags_constraint(flags_val) [flags]"=r"(flags_val) +#define bt_constraint(__bt_val) [bt_val]"rm"((uint32_t)__bt_val) + #define guest_execute_fastop_1(FEP, insn, __val, __flags) \ ({ \ - __asm__ __volatile__("bt $0, %[val]\n\t" \ - FEP insn " %[val]\n\t" \ - "pushfq\n\t" \ - "pop %[flags]\n\t" \ - : [val]"+r"(__val), [flags]"=r"(__flags) \ - : : "cc", "memory"); \ + __asm__ __volatile__(fastop(FEP insn " %[val]") \ + : [val]"+r"(__val), flags_constraint(__flags) \ + : bt_constraint(__val) \ + : "cc", "memory"); \ }) #define guest_test_fastop_1(insn, type_t, __val) \ @@ -36,12 +43,10 @@ #define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \ ({ \ - __asm__ __volatile__("bt $0, %[output]\n\t" \ - FEP insn " %[input], %[output]\n\t" \ - "pushfq\n\t" \ - "pop %[flags]\n\t" \ - : [output]"+r"(__output), [flags]"=r"(__flags) \ - : [input]"r"(__input) : "cc", "memory"); \ + __asm__ __volatile__(fastop(FEP insn " %[input], %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : [input]"r"(__input), bt_constraint(__output) \ + : "cc", "memory"); \ }) #define guest_test_fastop_2(insn, type_t, __val1, __val2) \ @@ -63,12 +68,10 @@ #define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \ ({ \ - __asm__ __volatile__("bt $0, %[output]\n\t" \ - FEP insn " %%cl, %[output]\n\t" \ - "pushfq\n\t" \ - "pop %[flags]\n\t" \ - : [output]"+r"(__output), [flags]"=r"(__flags) \ - : "c"(__shift) : "cc", "memory"); \ + __asm__ __volatile__(fastop(FEP insn " %%cl, %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : "c"(__shift), bt_constraint(__output) \ + : "cc", "memory"); \ }) #define guest_test_fastop_cl(insn, type_t, __val1, __val2) \ @@ -89,6 +92,42 @@ ex_flags, insn, shift, (uint64_t)input, flags); \ }) +#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \ + : "+a"(__a), "+d"(__d), flags_constraint(__flags), \ + KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : [denom]"rm"(__rm), bt_constraint(__rm) \ + : "cc", "memory", KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define guest_test_fastop_div(insn, type_t, __val1, __val2) \ +({ \ + type_t _a = __val1, _d = __val1, rm = __val2; \ + type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \ + uint64_t flags, ex_flags; \ + uint8_t v, ex_v; \ + \ + ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \ + v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \ + \ + GUEST_ASSERT_EQ(v, ex_v); \ + __GUEST_ASSERT(v == ex_v, \ + "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \ + ex_v, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, v); \ + __GUEST_ASSERT(a == ex_a && d == ex_d, \ + "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\ + (uint64_t)ex_a, (uint64_t)ex_d, insn, (uint64_t)_a, \ + (uint64_t)_d, (uint64_t)rm, (uint64_t)a, (uint64_t)d); \ + __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \ + "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, flags);\ +}) + static const uint64_t vals[] = { 0, 1, @@ -115,14 +154,16 @@ do { \ guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \ +if (sizeof(type_t) != 1) { \ guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \ - guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \ +} \ + guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \ @@ -136,12 +177,15 @@ do { \ guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \ + \ + guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]); \ } \ } \ } while (0) static void guest_code(void) { + guest_test_fastops(uint8_t, "b"); guest_test_fastops(uint16_t, "w"); guest_test_fastops(uint32_t, "l"); guest_test_fastops(uint64_t, "q"); diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index c863a689aa98..3c21af811d8f 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -45,7 +45,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) TEST_ASSERT((entry->function >= 0x40000000) && (entry->function <= 0x40000082), - "function %x is our of supported range", + "function %x is out of supported range", entry->function); TEST_ASSERT(entry->index == 0, diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 068e9c69710d..99d327084172 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -54,12 +54,12 @@ static void guest_msr(struct msr_data *msr) if (msr->fault_expected) __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); + "Expected #GP on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); else __GUEST_ASSERT(!vector, - "Expected success on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); + "Expected success on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); if (vector || is_write_only_msr(msr->idx)) goto done; @@ -102,12 +102,12 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { __GUEST_ASSERT(vector == UD_VECTOR, - "Expected #UD for control '%lu', got vector '0x%x'", - hcall->control, vector); + "Expected #UD for control '%lu', got %s", + hcall->control, ex_str(vector)); } else { __GUEST_ASSERT(!vector, - "Expected no exception for control '%lu', got vector '0x%x'", - hcall->control, vector); + "Expected no exception for control '%lu', got %s", + hcall->control, ex_str(vector)); GUEST_ASSERT_EQ(res, hcall->expect); } diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 0eb371c62ab8..e45c028d2a7e 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -30,12 +30,12 @@ do { \ \ if (fault_wanted) \ __GUEST_ASSERT((vector) == UD_VECTOR, \ - "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ + "Expected #UD on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ else \ __GUEST_ASSERT(!(vector), \ - "Expected success on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ + "Expected success on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ } while (0) static void guest_monitor_wait(void *arg) diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 8aaaf25b6111..bb215230cc8a 100644 --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -75,6 +75,11 @@ static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, + [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL }, }; kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); @@ -158,10 +163,18 @@ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr switch (idx) { case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); break; case INTEL_ARCH_BRANCHES_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); break; case INTEL_ARCH_LLC_REFERENCES_INDEX: case INTEL_ARCH_LLC_MISSES_INDEX: @@ -171,9 +184,12 @@ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr fallthrough; case INTEL_ARCH_CPU_CYCLES_INDEX: case INTEL_ARCH_REFERENCE_CYCLES_INDEX: + case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX: + case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX: GUEST_ASSERT_NE(count, 0); break; case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: + case INTEL_ARCH_TOPDOWN_RETIRING_INDEX: __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, "Expected top-down slots >= %u, got count = %lu", NUM_INSNS_RETIRED, count); @@ -311,7 +327,7 @@ static void guest_test_arch_events(void) } static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, - uint8_t length, uint8_t unavailable_mask) + uint8_t length, uint32_t unavailable_mask) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -320,6 +336,9 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, if (!pmu_version) return; + unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit, + X86_PROPERTY_PMU_EVENTS_MASK.lo_bit); + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, pmu_version, perf_capabilities); @@ -344,8 +363,8 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ - "Expected %s on " #insn "(0x%x), got vector %u", \ - expect_gp ? "#GP" : "no fault", msr, vector) \ + "Expected %s on " #insn "(0x%x), got %s", \ + expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \ #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ __GUEST_ASSERT(val == expected, \ @@ -575,6 +594,26 @@ static void test_intel_counters(void) }; /* + * To keep the total runtime reasonable, test only a handful of select, + * semi-arbitrary values for the mask of unavailable PMU events. Test + * 0 (all events available) and all ones (no events available) as well + * as alternating bit sequencues, e.g. to detect if KVM is checking the + * wrong bit(s). + */ + const uint32_t unavailable_masks[] = { + 0x0, + 0xffffffffu, + 0xaaaaaaaau, + 0x55555555u, + 0xf0f0f0f0u, + 0x0f0f0f0fu, + 0xa0a0a0a0u, + 0x0a0a0a0au, + 0x50505050u, + 0x05050505u, + }; + + /* * Test up to PMU v5, which is the current maximum version defined by * Intel, i.e. is the last version that is guaranteed to be backwards * compatible with KVM's existing behavior. @@ -611,16 +650,7 @@ static void test_intel_counters(void) pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", v, perf_caps[i]); - /* - * To keep the total runtime reasonable, test every - * possible non-zero, non-reserved bitmap combination - * only with the native PMU version and the full bit - * vector length. - */ - if (v == pmu_version) { - for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++) - test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k); - } + /* * Test single bits for all PMU version and lengths up * the number of events +1 (to verify KVM doesn't do @@ -629,11 +659,8 @@ static void test_intel_counters(void) * ones i.e. all events being available and unavailable. */ for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { - test_arch_events(v, perf_caps[i], j, 0); - test_arch_events(v, perf_caps[i], j, 0xff); - - for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++) - test_arch_events(v, perf_caps[i], j, BIT(k)); + for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++) + test_arch_events(v, perf_caps[i], j, unavailable_masks[k]); } pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index c15513cd74d1..1c5b7611db24 100644 --- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c @@ -214,8 +214,10 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) do { \ uint64_t br = pmc_results.branches_retired; \ uint64_t ir = pmc_results.instructions_retired; \ + bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \ + br >= NUM_BRANCHES : br == NUM_BRANCHES; \ \ - if (br && br != NUM_BRANCHES) \ + if (br && !br_matched) \ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \ __func__, br, NUM_BRANCHES); \ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \ diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c index a1f5ff45d518..7ff6f62e20a3 100644 --- a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c @@ -29,7 +29,7 @@ static union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; - u64 anythread_deprecated:1; + u64 pebs_timing_info:1; }; u64 capabilities; } host_cap; @@ -44,6 +44,7 @@ static const union perf_capabilities immutable_caps = { .pebs_arch_reg = 1, .pebs_format = -1, .pebs_baseline = 1, + .pebs_timing_info = 1, }; static const union perf_capabilities format_caps = { @@ -56,8 +57,8 @@ static void guest_test_perf_capabilities_gp(uint64_t val) uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP for value '0x%lx', got vector '0x%x'", - val, vector); + "Expected #GP for value '0x%lx', got %s", + val, ex_str(vector)); } static void guest_code(uint64_t current_val) diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index fdebff1165c7..3b4814c55722 100644 --- a/tools/testing/selftests/kvm/x86/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -120,8 +120,8 @@ static void test_icr(struct xapic_vcpu *x) __test_icr(x, icr | i); /* - * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of - * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. + * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use + * vector 0xff. */ icr = APIC_INT_ASSERT | 0xff; for (i = 0; i < 0xff; i++) { diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c index c8a5c5e51661..d038c1571729 100644 --- a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c @@ -81,13 +81,13 @@ static void guest_code(void) vector = xsetbv_safe(0, XFEATURE_MASK_FP); __GUEST_ASSERT(!vector, - "Expected success on XSETBV(FP), got vector '0x%x'", - vector); + "Expected success on XSETBV(FP), got %s", + ex_str(vector)); vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, - "Expected success on XSETBV(0x%lx), got vector '0x%x'", - supported_xcr0, vector); + "Expected success on XSETBV(0x%lx), got %s", + supported_xcr0, ex_str(vector)); for (i = 0; i < 64; i++) { if (supported_xcr0 & BIT_ULL(i)) @@ -95,8 +95,8 @@ static void guest_code(void) vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'", - BIT_ULL(i), supported_xcr0, vector); + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s", + BIT_ULL(i), supported_xcr0, ex_str(vector)); } GUEST_DONE(); diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b16986aa6442..02fd1393947a 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -20,14 +20,12 @@ #include <sys/time.h> #include <unistd.h> +#include "../kselftest.h" + #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif -#ifndef __maybe_unused -#define __maybe_unused __attribute__((__unused__)) -#endif - #define REGEX_LANDLOCK_PREFIX "^audit([0-9.:]\\+): domain=\\([0-9a-f]\\+\\)" struct audit_filter { diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 88a3c78f5d98..9acecae36f51 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -22,10 +22,6 @@ #define TMP_DIR "tmp" -#ifndef __maybe_unused -#define __maybe_unused __attribute__((__unused__)) -#endif - /* TEST_F_FORK() should not be used for new tests. */ #define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 530390033929..a448fae57831 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -228,7 +228,10 @@ $(OUTPUT)/%:%.S $(LINK.S) $^ $(LDLIBS) -o $@ endif +# Extract the expected header directory +khdr_output := $(patsubst %/usr/include,%,$(filter %/usr/include,$(KHDR_INCLUDES))) + headers: - $(Q)$(MAKE) -C $(top_srcdir) headers + $(Q)$(MAKE) -f $(top_srcdir)/Makefile -C $(khdr_output) headers .PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir headers diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index e7b23a8a05fe..c2a8586e51a1 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -58,3 +58,5 @@ pkey_sighandler_tests_32 pkey_sighandler_tests_64 guard-regions merge +prctl_thp_disable +rmap diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index d13b3cef2a2b..eaf9312097f7 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -34,6 +34,7 @@ endif MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +CFLAGS += -Wunreachable-code LDLIBS = -lrt -lpthread -lm # Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is @@ -86,6 +87,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += pfnmap TEST_GEN_FILES += process_madv +TEST_GEN_FILES += prctl_thp_disable TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress @@ -101,6 +103,7 @@ TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable TEST_GEN_FILES += guard-regions TEST_GEN_FILES += merge +TEST_GEN_FILES += rmap ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty @@ -228,6 +231,8 @@ $(OUTPUT)/ksm_tests: LDLIBS += -lnuma $(OUTPUT)/migration: LDLIBS += -lnuma +$(OUTPUT)/rmap: LDLIBS += -lnuma + local_config.mk local_config.h: check_config.sh /bin/sh ./check_config.sh $(CC) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index c744c603d688..6560c26f47d1 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -41,11 +41,6 @@ static size_t hugetlbsizes[10]; static int gup_fd; static bool has_huge_zeropage; -static int sz2ord(size_t size) -{ - return __builtin_ctzll(size / pagesize); -} - static int detect_thp_sizes(size_t sizes[], int max) { int count = 0; @@ -57,7 +52,7 @@ static int detect_thp_sizes(size_t sizes[], int max) if (!pmdsize) return 0; - orders = 1UL << sz2ord(pmdsize); + orders = 1UL << sz2ord(pmdsize, pagesize); orders |= thp_supported_orders(); for (i = 0; orders && count < max; i++) { @@ -1216,8 +1211,8 @@ static void run_anon_test_case(struct test_case const *test_case) size_t size = thpsizes[i]; struct thp_settings settings = *thp_current_settings(); - settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; - settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER; + settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS; thp_push_settings(&settings); if (size == pmdsize) { @@ -1868,7 +1863,7 @@ int main(int argc, char **argv) if (pmdsize) { /* Only if THP is supported. */ thp_read_settings(&default_settings); - default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; + default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT; thp_save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c index bdeaac67ff9a..8900b840c17a 100644 --- a/tools/testing/selftests/mm/gup_test.c +++ b/tools/testing/selftests/mm/gup_test.c @@ -139,6 +139,8 @@ int main(int argc, char **argv) break; case 'n': nr_pages = atoi(optarg); + if (nr_pages < 0) + nr_pages = size / psize(); break; case 't': thp = 1; diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index 141bf63cbe05..15aadaf24a66 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -2027,11 +2027,10 @@ TEST_F(hmm, hmm_cow_in_device) if (pid == -1) ASSERT_EQ(pid, 0); if (!pid) { - /* Child process waitd for SIGTERM from the parent. */ + /* Child process waits for SIGTERM from the parent. */ while (1) { } - perror("Should not reach this\n"); - exit(0); + /* Should not reach this */ } /* Parent process writes to COW pages(s) and gets a * new copy in system. In case of device private pages, diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index c463d1c09c9b..2bd1dac75c3f 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -65,10 +65,20 @@ static void register_region_with_uffd(char *addr, size_t len) struct uffdio_api uffdio_api; /* Create and enable userfaultfd object. */ - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - if (uffd == -1) - ksft_exit_fail_msg("userfaultfd: %s\n", strerror(errno)); + if (uffd == -1) { + switch (errno) { + case EPERM: + ksft_exit_skip("Insufficient permissions, try running as root.\n"); + break; + case ENOSYS: + ksft_exit_skip("userfaultfd is not supported/not enabled.\n"); + break; + default: + ksft_exit_fail_msg("userfaultfd failed with %s\n", strerror(errno)); + break; + } + } uffdio_api.api = UFFD_API; uffdio_api.features = 0; diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index a18c50d51141..3fe7ef04ac62 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -394,7 +394,7 @@ static void *file_setup_area(int nr_hpages) perror("open()"); exit(EXIT_FAILURE); } - p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC, + p = mmap(BASE_ADDR, size, PROT_READ, MAP_PRIVATE, finfo.fd, 0); if (p == MAP_FAILED || p != BASE_ADDR) { perror("mmap()"); diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index d8bd1911dfc0..ac136f04b8d6 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -38,14 +38,11 @@ enum ksm_merge_mode { }; static int mem_fd; -static int ksm_fd; -static int ksm_full_scans_fd; -static int proc_self_ksm_stat_fd; -static int proc_self_ksm_merging_pages_fd; -static int ksm_use_zero_pages_fd; static int pagemap_fd; static size_t pagesize; +static void init_global_file_handles(void); + static bool range_maps_duplicates(char *addr, unsigned long size) { unsigned long offs_a, offs_b, pfn_a, pfn_b; @@ -73,88 +70,6 @@ static bool range_maps_duplicates(char *addr, unsigned long size) return false; } -static long get_my_ksm_zero_pages(void) -{ - char buf[200]; - char *substr_ksm_zero; - size_t value_pos; - ssize_t read_size; - unsigned long my_ksm_zero_pages; - - if (!proc_self_ksm_stat_fd) - return 0; - - read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); - if (read_size < 0) - return -errno; - - buf[read_size] = 0; - - substr_ksm_zero = strstr(buf, "ksm_zero_pages"); - if (!substr_ksm_zero) - return 0; - - value_pos = strcspn(substr_ksm_zero, "0123456789"); - my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10); - - return my_ksm_zero_pages; -} - -static long get_my_merging_pages(void) -{ - char buf[10]; - ssize_t ret; - - if (proc_self_ksm_merging_pages_fd < 0) - return proc_self_ksm_merging_pages_fd; - - ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); - if (ret <= 0) - return -errno; - buf[ret] = 0; - - return strtol(buf, NULL, 10); -} - -static long ksm_get_full_scans(void) -{ - char buf[10]; - ssize_t ret; - - ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); - if (ret <= 0) - return -errno; - buf[ret] = 0; - - return strtol(buf, NULL, 10); -} - -static int ksm_merge(void) -{ - long start_scans, end_scans; - - /* Wait for two full scans such that any possible merging happened. */ - start_scans = ksm_get_full_scans(); - if (start_scans < 0) - return start_scans; - if (write(ksm_fd, "1", 1) != 1) - return -errno; - do { - end_scans = ksm_get_full_scans(); - if (end_scans < 0) - return end_scans; - } while (end_scans < start_scans + 2); - - return 0; -} - -static int ksm_unmerge(void) -{ - if (write(ksm_fd, "2", 1) != 1) - return -errno; - return 0; -} - static char *__mmap_and_merge_range(char val, unsigned long size, int prot, enum ksm_merge_mode mode) { @@ -163,12 +78,12 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot, int ret; /* Stabilize accounting by disabling KSM completely. */ - if (ksm_unmerge()) { + if (ksm_stop() < 0) { ksft_print_msg("Disabling (unmerging) KSM failed\n"); return err_map; } - if (get_my_merging_pages() > 0) { + if (ksm_get_self_merging_pages() > 0) { ksft_print_msg("Still pages merged\n"); return err_map; } @@ -218,7 +133,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot, } /* Run KSM to trigger merging and wait. */ - if (ksm_merge()) { + if (ksm_start() < 0) { ksft_print_msg("Running KSM failed\n"); goto unmap; } @@ -227,7 +142,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot, * Check if anything was merged at all. Ignore the zero page that is * accounted differently (depending on kernel support). */ - if (val && !get_my_merging_pages()) { + if (val && !ksm_get_self_merging_pages()) { ksft_print_msg("No pages got merged\n"); goto unmap; } @@ -274,6 +189,7 @@ static void test_unmerge(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -286,15 +202,12 @@ static void test_unmerge_zero_pages(void) ksft_print_msg("[RUN] %s\n", __func__); - if (proc_self_ksm_stat_fd < 0) { - ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n"); - return; - } - if (ksm_use_zero_pages_fd < 0) { - ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); + if (ksm_get_self_zero_pages() < 0) { + ksft_test_result_skip("accessing \"/proc/self/ksm_stat\" failed\n"); return; } - if (write(ksm_use_zero_pages_fd, "1", 1) != 1) { + + if (ksm_use_zero_pages() < 0) { ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); return; } @@ -306,7 +219,7 @@ static void test_unmerge_zero_pages(void) /* Check if ksm_zero_pages is updated correctly after KSM merging */ pages_expected = size / pagesize; - if (pages_expected != get_my_ksm_zero_pages()) { + if (pages_expected != ksm_get_self_zero_pages()) { ksft_test_result_fail("'ksm_zero_pages' updated after merging\n"); goto unmap; } @@ -319,7 +232,7 @@ static void test_unmerge_zero_pages(void) /* Check if ksm_zero_pages is updated correctly after unmerging */ pages_expected /= 2; - if (pages_expected != get_my_ksm_zero_pages()) { + if (pages_expected != ksm_get_self_zero_pages()) { ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n"); goto unmap; } @@ -329,7 +242,7 @@ static void test_unmerge_zero_pages(void) *((unsigned int *)&map[offs]) = offs; /* Now we should have no zeropages remaining. */ - if (get_my_ksm_zero_pages()) { + if (ksm_get_self_zero_pages()) { ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n"); goto unmap; } @@ -338,6 +251,7 @@ static void test_unmerge_zero_pages(void) ksft_test_result(!range_maps_duplicates(map, size), "KSM zero pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -366,6 +280,7 @@ static void test_unmerge_discarded(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -452,6 +367,7 @@ static void test_unmerge_uffd_wp(void) close_uffd: close(uffd); unmap: + ksm_stop(); munmap(map, size); } #endif @@ -506,27 +422,30 @@ static int test_child_ksm(void) /* Test if KSM is enabled for the process. */ if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1) - return -1; + return 1; /* Test if merge could really happen. */ map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE); if (map == MAP_MERGE_FAIL) - return -2; + return 2; else if (map == MAP_MERGE_SKIP) - return -3; + return 3; + ksm_stop(); munmap(map, size); return 0; } static void test_child_ksm_err(int status) { - if (status == -1) + if (status == 1) ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n"); - else if (status == -2) + else if (status == 2) ksft_test_result_fail("Merge in child failed\n"); - else if (status == -3) + else if (status == 3) ksft_test_result_skip("Merge in child skipped\n"); + else if (status == 4) + ksft_test_result_fail("Binary not found\n"); } /* Verify that prctl ksm flag is inherited. */ @@ -548,6 +467,7 @@ static void test_prctl_fork(void) child_pid = fork(); if (!child_pid) { + init_global_file_handles(); exit(test_child_ksm()); } else if (child_pid < 0) { ksft_test_result_fail("fork() failed\n"); @@ -595,10 +515,10 @@ static void test_prctl_fork_exec(void) return; } else if (child_pid == 0) { char *prg_name = "./ksm_functional_tests"; - char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME }; + char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME, NULL }; execv(prg_name, argv_for_program); - return; + exit(4); } if (waitpid(child_pid, &status, 0) > 0) { @@ -644,6 +564,7 @@ static void test_prctl_unmerge(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -677,6 +598,47 @@ static void test_prot_none(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); + munmap(map, size); +} + +static void test_fork_ksm_merging_page_count(void) +{ + const unsigned int size = 2 * MiB; + char *map; + pid_t child_pid; + int status; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); + if (map == MAP_FAILED) + return; + + child_pid = fork(); + if (!child_pid) { + init_global_file_handles(); + exit(ksm_get_self_merging_pages()); + } else if (child_pid < 0) { + ksft_test_result_fail("fork() failed\n"); + goto unmap; + } + + if (waitpid(child_pid, &status, 0) < 0) { + ksft_test_result_fail("waitpid() failed\n"); + goto unmap; + } + + status = WEXITSTATUS(status); + if (status) { + ksft_test_result_fail("ksm_merging_page in child: %d\n", status); + goto unmap; + } + + ksft_test_result_pass("ksm_merging_pages is not inherited after fork\n"); + +unmap: + ksm_stop(); munmap(map, size); } @@ -685,24 +647,20 @@ static void init_global_file_handles(void) mem_fd = open("/proc/self/mem", O_RDWR); if (mem_fd < 0) ksft_exit_fail_msg("opening /proc/self/mem failed\n"); - ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); - if (ksm_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); - ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); - if (ksm_full_scans_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + if (ksm_stop() < 0) + ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/run\") failed\n"); + if (ksm_get_full_scans() < 0) + ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/full_scans\") failed\n"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); - proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); - proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", - O_RDONLY); - ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + if (ksm_get_self_merging_pages() < 0) + ksft_exit_skip("accessing \"/proc/self/ksm_merging_pages\") failed\n"); } int main(int argc, char **argv) { - unsigned int tests = 8; + unsigned int tests = 9; int err; if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { @@ -734,6 +692,7 @@ int main(int argc, char **argv) test_prctl_fork(); test_prctl_fork_exec(); test_prctl_unmerge(); + test_fork_ksm_merging_page_count(); err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index b6fabd5c27ed..d8d11bc67ddc 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -264,23 +264,6 @@ static void test_softdirty(void) munmap(addr, SIZE); } -static int system_has_softdirty(void) -{ - /* - * There is no way to check if the kernel supports soft-dirty, other - * than by writing to a page and seeing if the bit was set. But the - * tests are intended to check that the bit gets set when it should, so - * doing that check would turn a potentially legitimate fail into a - * skip. Fortunately, we know for sure that arm64 does not support - * soft-dirty. So for now, let's just use the arch as a corse guide. - */ -#if defined(__aarch64__) - return 0; -#else - return 1; -#endif -} - int main(int argc, char **argv) { int nr_tests = 16; @@ -288,7 +271,7 @@ int main(int argc, char **argv) pagesize = getpagesize(); - if (system_has_softdirty()) + if (softdirty_supported()) nr_tests += 5; ksft_print_header(); @@ -300,7 +283,7 @@ int main(int argc, char **argv) test_holes(); test_populate_read(); test_populate_write(); - if (system_has_softdirty()) + if (softdirty_supported()) test_softdirty(); err = ksft_get_fail_cnt(); diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 5bd52a951cbd..bf2863b102e3 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -846,7 +846,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, } if (err != EFAULT) { errno = err; - perror("mrmeap() unexpected error"); + perror("mremap() unexpected error"); success = false; goto out_unmap; } @@ -899,7 +899,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, } if (err != EFAULT) { errno = err; - perror("mrmeap() unexpected error"); + perror("mremap() unexpected error"); success = false; goto out_unmap; } @@ -948,7 +948,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, } if (err != EFAULT) { errno = err; - perror("mrmeap() unexpected error"); + perror("mremap() unexpected error"); success = false; goto out_unmap; } diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index e6face7c0166..4fc8e578ec7c 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -209,7 +209,7 @@ int userfaultfd_tests(void) wp_addr_range(mem, mem_size); vec_size = mem_size/page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); @@ -247,11 +247,11 @@ int sanity_tests_sd(void) vec_size = num_pages/2; mem_size = num_pages * page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); - vec2 = malloc(sizeof(struct page_region) * vec_size); + vec2 = calloc(vec_size, sizeof(struct page_region)); if (!vec2) ksft_exit_fail_msg("error nomem\n"); @@ -436,7 +436,7 @@ int sanity_tests_sd(void) mem_size = 1050 * page_size; vec_size = mem_size/(page_size*2); - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); @@ -491,7 +491,7 @@ int sanity_tests_sd(void) mem_size = 10000 * page_size; vec_size = 50; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); @@ -541,7 +541,7 @@ int sanity_tests_sd(void) vec_size = 1000; mem_size = vec_size * page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); @@ -695,8 +695,8 @@ int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip) } vec_size = mem_size/page_size; - vec = malloc(sizeof(struct page_region) * vec_size); - vec2 = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); + vec2 = calloc(vec_size, sizeof(struct page_region)); /* 1. all new pages must be not be written (dirty) */ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, @@ -807,8 +807,8 @@ int hpage_unit_tests(void) unsigned long long vec_size = map_size/page_size; struct page_region *vec, *vec2; - vec = malloc(sizeof(struct page_region) * vec_size); - vec2 = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); + vec2 = calloc(vec_size, sizeof(struct page_region)); if (!vec || !vec2) ksft_exit_fail_msg("malloc failed\n"); @@ -997,7 +997,7 @@ int unmapped_region_tests(void) void *start = (void *)0x10000000; int written, len = 0x00040000; long vec_size = len / page_size; - struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); + struct page_region *vec = calloc(vec_size, sizeof(struct page_region)); /* 1. Get written pages */ written = pagemap_ioctl(start, len, vec, vec_size, 0, 0, @@ -1062,7 +1062,7 @@ int sanity_tests(void) mem_size = 10 * page_size; vec_size = mem_size / page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED || vec == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c index 866ac023baf5..88659f0a90ea 100644 --- a/tools/testing/selftests/mm/pfnmap.c +++ b/tools/testing/selftests/mm/pfnmap.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem' + * Basic VM_PFNMAP tests relying on mmap() of input file provided. + * Use '/dev/mem' as default. * * Copyright 2025, Red Hat, Inc. * @@ -25,6 +26,7 @@ #include "vm_util.h" static sigjmp_buf sigjmp_buf_env; +static char *file = "/dev/mem"; static void signal_handler(int sig) { @@ -51,7 +53,7 @@ static int test_read_access(char *addr, size_t size, size_t pagesize) return ret; } -static int find_ram_target(off_t *phys_addr, +static int find_ram_target(off_t *offset, unsigned long long pagesize) { unsigned long long start, end; @@ -91,7 +93,7 @@ static int find_ram_target(off_t *phys_addr, /* We need two pages. */ if (end > start + 2 * pagesize) { fclose(file); - *phys_addr = start; + *offset = start; return 0; } } @@ -100,7 +102,7 @@ static int find_ram_target(off_t *phys_addr, FIXTURE(pfnmap) { - off_t phys_addr; + off_t offset; size_t pagesize; int dev_mem_fd; char *addr1; @@ -113,23 +115,31 @@ FIXTURE_SETUP(pfnmap) { self->pagesize = getpagesize(); - /* We'll require two physical pages throughout our tests ... */ - if (find_ram_target(&self->phys_addr, self->pagesize)) - SKIP(return, "Cannot find ram target in '/proc/iomem'\n"); + if (strncmp(file, "/dev/mem", strlen("/dev/mem")) == 0) { + /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->offset, self->pagesize)) + SKIP(return, + "Cannot find ram target in '/proc/iomem'\n"); + } else { + self->offset = 0; + } - self->dev_mem_fd = open("/dev/mem", O_RDONLY); + self->dev_mem_fd = open(file, O_RDONLY); if (self->dev_mem_fd < 0) - SKIP(return, "Cannot open '/dev/mem'\n"); + SKIP(return, "Cannot open '%s'\n", file); self->size1 = self->pagesize * 2; self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, - self->dev_mem_fd, self->phys_addr); + self->dev_mem_fd, self->offset); if (self->addr1 == MAP_FAILED) - SKIP(return, "Cannot mmap '/dev/mem'\n"); + SKIP(return, "Cannot mmap '%s'\n", file); + + if (!check_vmflag_pfnmap(self->addr1)) + SKIP(return, "Invalid file: '%s'. Not pfnmap'ed\n", file); /* ... and want to be able to read from them. */ if (test_read_access(self->addr1, self->size1, self->pagesize)) - SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n"); + SKIP(return, "Cannot read-access mmap'ed '%s'\n", file); self->size2 = 0; self->addr2 = MAP_FAILED; @@ -182,7 +192,7 @@ TEST_F(pfnmap, munmap_split) */ self->size2 = self->pagesize; self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, - self->dev_mem_fd, self->phys_addr); + self->dev_mem_fd, self->offset); ASSERT_NE(self->addr2, MAP_FAILED); } @@ -246,4 +256,14 @@ TEST_F(pfnmap, fork) ASSERT_EQ(ret, 0); } -TEST_HARNESS_MAIN +int main(int argc, char **argv) +{ + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--") == 0) { + if (i + 1 < argc && strlen(argv[i + 1]) > 0) + file = argv[i + 1]; + return test_harness_run(i, argv); + } + } + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index ea404f80e6cb..fa15f006fa68 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -84,9 +84,6 @@ extern void abort_hooks(void); #ifndef noinline # define noinline __attribute__((noinline)) #endif -#ifndef __maybe_unused -# define __maybe_unused __attribute__((__unused__)) -#endif int sys_pkey_alloc(unsigned long flags, unsigned long init_val); int sys_pkey_free(unsigned long pkey); diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index b5e076a564c9..302fef54049c 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -41,7 +41,7 @@ static siginfo_t siginfo = {0}; * syscall will attempt to access the PLT in order to call a library function * which is protected by MPK 0 which we don't have access to. */ -static inline __always_inline +static __always_inline long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) { unsigned long ret; diff --git a/tools/testing/selftests/mm/prctl_thp_disable.c b/tools/testing/selftests/mm/prctl_thp_disable.c new file mode 100644 index 000000000000..84b4a4b345af --- /dev/null +++ b/tools/testing/selftests/mm/prctl_thp_disable.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic tests for PR_GET/SET_THP_DISABLE prctl calls + * + * Author(s): Usama Arif <usamaarif642@gmail.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/mman.h> +#include <sys/prctl.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "thp_settings.h" +#include "vm_util.h" + +#ifndef PR_THP_DISABLE_EXCEPT_ADVISED +#define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) +#endif + +enum thp_collapse_type { + THP_COLLAPSE_NONE, + THP_COLLAPSE_MADV_NOHUGEPAGE, + THP_COLLAPSE_MADV_HUGEPAGE, /* MADV_HUGEPAGE before access */ + THP_COLLAPSE_MADV_COLLAPSE, /* MADV_COLLAPSE after access */ +}; + +/* + * Function to mmap a buffer, fault it in, madvise it appropriately (before + * page fault for MADV_HUGE, and after for MADV_COLLAPSE), and check if the + * mmap region is huge. + * Returns: + * 0 if test doesn't give hugepage + * 1 if test gives a hugepage + * -errno if mmap fails + */ +static int test_mmap_thp(enum thp_collapse_type madvise_buf, size_t pmdsize) +{ + char *mem, *mmap_mem; + size_t mmap_size; + int ret; + + /* For alignment purposes, we need twice the THP size. */ + mmap_size = 2 * pmdsize; + mmap_mem = (char *)mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) + return -errno; + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); + + if (madvise_buf == THP_COLLAPSE_MADV_HUGEPAGE) + madvise(mem, pmdsize, MADV_HUGEPAGE); + else if (madvise_buf == THP_COLLAPSE_MADV_NOHUGEPAGE) + madvise(mem, pmdsize, MADV_NOHUGEPAGE); + + /* Ensure memory is allocated */ + memset(mem, 1, pmdsize); + + if (madvise_buf == THP_COLLAPSE_MADV_COLLAPSE) + madvise(mem, pmdsize, MADV_COLLAPSE); + + /* HACK: make sure we have a separate VMA that we can check reliably. */ + mprotect(mem, pmdsize, PROT_READ); + + ret = check_huge_anon(mem, 1, pmdsize); + munmap(mmap_mem, mmap_size); + return ret; +} + +static void prctl_thp_disable_completely_test(struct __test_metadata *const _metadata, + size_t pmdsize, + enum thp_enabled thp_policy) +{ + ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 1); + + /* tests after prctl overrides global policy */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 0); + + /* Reset to global policy */ + ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0); + + /* tests after prctl is cleared, and only global policy is effective */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), + thp_policy == THP_ALWAYS ? 1 : 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), + thp_policy == THP_NEVER ? 0 : 1); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); +} + +FIXTURE(prctl_thp_disable_completely) +{ + struct thp_settings settings; + size_t pmdsize; +}; + +FIXTURE_VARIANT(prctl_thp_disable_completely) +{ + enum thp_enabled thp_policy; +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, never) +{ + .thp_policy = THP_NEVER, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, madvise) +{ + .thp_policy = THP_MADVISE, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, always) +{ + .thp_policy = THP_ALWAYS, +}; + +FIXTURE_SETUP(prctl_thp_disable_completely) +{ + if (!thp_available()) + SKIP(return, "Transparent Hugepages not available\n"); + + self->pmdsize = read_pmd_pagesize(); + if (!self->pmdsize) + SKIP(return, "Unable to read PMD size\n"); + + if (prctl(PR_SET_THP_DISABLE, 1, NULL, NULL, NULL)) + SKIP(return, "Unable to disable THPs completely for the process\n"); + + thp_save_settings(); + thp_read_settings(&self->settings); + self->settings.thp_enabled = variant->thp_policy; + self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT; + thp_write_settings(&self->settings); +} + +FIXTURE_TEARDOWN(prctl_thp_disable_completely) +{ + thp_restore_settings(); +} + +TEST_F(prctl_thp_disable_completely, nofork) +{ + prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy); +} + +TEST_F(prctl_thp_disable_completely, fork) +{ + int ret = 0; + pid_t pid; + + /* Make sure prctl changes are carried across fork */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy); + return; + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +static void prctl_thp_disable_except_madvise_test(struct __test_metadata *const _metadata, + size_t pmdsize, + enum thp_enabled thp_policy) +{ + ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 3); + + /* tests after prctl overrides global policy */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), + thp_policy == THP_NEVER ? 0 : 1); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); + + /* Reset to global policy */ + ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0); + + /* tests after prctl is cleared, and only global policy is effective */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), + thp_policy == THP_ALWAYS ? 1 : 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), + thp_policy == THP_NEVER ? 0 : 1); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); +} + +FIXTURE(prctl_thp_disable_except_madvise) +{ + struct thp_settings settings; + size_t pmdsize; +}; + +FIXTURE_VARIANT(prctl_thp_disable_except_madvise) +{ + enum thp_enabled thp_policy; +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, never) +{ + .thp_policy = THP_NEVER, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, madvise) +{ + .thp_policy = THP_MADVISE, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, always) +{ + .thp_policy = THP_ALWAYS, +}; + +FIXTURE_SETUP(prctl_thp_disable_except_madvise) +{ + if (!thp_available()) + SKIP(return, "Transparent Hugepages not available\n"); + + self->pmdsize = read_pmd_pagesize(); + if (!self->pmdsize) + SKIP(return, "Unable to read PMD size\n"); + + if (prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED, NULL, NULL)) + SKIP(return, "Unable to set PR_THP_DISABLE_EXCEPT_ADVISED\n"); + + thp_save_settings(); + thp_read_settings(&self->settings); + self->settings.thp_enabled = variant->thp_policy; + self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT; + thp_write_settings(&self->settings); +} + +FIXTURE_TEARDOWN(prctl_thp_disable_except_madvise) +{ + thp_restore_settings(); +} + +TEST_F(prctl_thp_disable_except_madvise, nofork) +{ + prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize, variant->thp_policy); +} + +TEST_F(prctl_thp_disable_except_madvise, fork) +{ + int ret = 0; + pid_t pid; + + /* Make sure prctl changes are carried across fork */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize, + variant->thp_policy); + return; + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 23ebec367015..2085982dba69 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -557,13 +557,11 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, int nr_iterations = random() % 100; int ret; - while (0) { + while (nr_iterations-- >= 0) { int rpkey = alloc_random_pkey(); ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", ptr, size, orig_prot, pkey, ret); - if (nr_iterations-- < 0) - break; dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" " shadow: 0x%016llx\n", @@ -1304,7 +1302,7 @@ static void test_mprotect_with_pkey_0(int *ptr, u16 pkey) static void test_ptrace_of_child(int *ptr, u16 pkey) { - __attribute__((__unused__)) int peek_result; + __always_unused int peek_result; pid_t child_pid; void *ignored = 0; long ret; diff --git a/tools/testing/selftests/mm/rmap.c b/tools/testing/selftests/mm/rmap.c new file mode 100644 index 000000000000..13f7bccfd0a9 --- /dev/null +++ b/tools/testing/selftests/mm/rmap.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RMAP functional tests + * + * Author(s): Wei Yang <richard.weiyang@gmail.com> + */ + +#include "../kselftest_harness.h" +#include <strings.h> +#include <pthread.h> +#include <numa.h> +#include <numaif.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <signal.h> +#include <time.h> +#include <sys/sem.h> +#include <unistd.h> +#include <fcntl.h> + +#include "vm_util.h" + +#define TOTAL_LEVEL 5 +#define MAX_CHILDREN 3 + +#define FAIL_ON_CHECK (1 << 0) +#define FAIL_ON_WORK (1 << 1) + +struct sembuf sem_wait = {0, -1, 0}; +struct sembuf sem_signal = {0, 1, 0}; + +enum backend_type { + ANON, + SHM, + NORM_FILE, +}; + +#define PREFIX "kst_rmap" +#define MAX_FILENAME_LEN 256 +const char *suffixes[] = { + "", + "_shm", + "_file", +}; + +struct global_data; +typedef int (*work_fn)(struct global_data *data); +typedef int (*check_fn)(struct global_data *data); +typedef void (*prepare_fn)(struct global_data *data); + +struct global_data { + int worker_level; + + int semid; + int pipefd[2]; + + unsigned int mapsize; + unsigned int rand_seed; + char *region; + + prepare_fn do_prepare; + work_fn do_work; + check_fn do_check; + + enum backend_type backend; + char filename[MAX_FILENAME_LEN]; + + unsigned long *expected_pfn; +}; + +/* + * Create a process tree with TOTAL_LEVEL height and at most MAX_CHILDREN + * children for each. + * + * It will randomly select one process as 'worker' process which will + * 'do_work' until all processes are created. And all other processes will + * wait until 'worker' finish its work. + */ +void propagate_children(struct __test_metadata *_metadata, struct global_data *data) +{ + pid_t root_pid, pid; + unsigned int num_child; + int status; + int ret = 0; + int curr_child, worker_child; + int curr_level = 1; + bool is_worker = true; + + root_pid = getpid(); +repeat: + num_child = rand_r(&data->rand_seed) % MAX_CHILDREN + 1; + worker_child = is_worker ? rand_r(&data->rand_seed) % num_child : -1; + + for (curr_child = 0; curr_child < num_child; curr_child++) { + pid = fork(); + + if (pid < 0) { + perror("Error: fork\n"); + } else if (pid == 0) { + curr_level++; + + if (curr_child != worker_child) + is_worker = false; + + if (curr_level == TOTAL_LEVEL) + break; + + data->rand_seed += curr_child; + goto repeat; + } + } + + if (data->do_prepare) + data->do_prepare(data); + + close(data->pipefd[1]); + + if (is_worker && curr_level == data->worker_level) { + /* This is the worker process, first wait last process created */ + char buf; + + while (read(data->pipefd[0], &buf, 1) > 0) + ; + + if (data->do_work) + ret = data->do_work(data); + + /* Kick others */ + semctl(data->semid, 0, IPC_RMID); + } else { + /* Wait worker finish */ + semop(data->semid, &sem_wait, 1); + if (data->do_check) + ret = data->do_check(data); + } + + /* Wait all child to quit */ + while (wait(&status) > 0) { + if (WIFEXITED(status)) + ret |= WEXITSTATUS(status); + } + + if (getpid() == root_pid) { + if (ret & FAIL_ON_WORK) + SKIP(return, "Failed in worker"); + + ASSERT_EQ(ret, 0); + } else { + exit(ret); + } +} + +FIXTURE(migrate) +{ + struct global_data data; +}; + +FIXTURE_SETUP(migrate) +{ + struct global_data *data = &self->data; + + if (numa_available() < 0) + SKIP(return, "NUMA not available"); + if (numa_bitmask_weight(numa_all_nodes_ptr) <= 1) + SKIP(return, "Not enough NUMA nodes available"); + + data->mapsize = getpagesize(); + + data->expected_pfn = mmap(0, sizeof(unsigned long), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(data->expected_pfn, MAP_FAILED); + + /* Prepare semaphore */ + data->semid = semget(IPC_PRIVATE, 1, 0666 | IPC_CREAT); + ASSERT_NE(data->semid, -1); + ASSERT_NE(semctl(data->semid, 0, SETVAL, 0), -1); + + /* Prepare pipe */ + ASSERT_NE(pipe(data->pipefd), -1); + + data->rand_seed = time(NULL); + srand(data->rand_seed); + + data->worker_level = rand() % TOTAL_LEVEL + 1; + + data->do_prepare = NULL; + data->do_work = NULL; + data->do_check = NULL; + + data->backend = ANON; +}; + +FIXTURE_TEARDOWN(migrate) +{ + struct global_data *data = &self->data; + + if (data->region != MAP_FAILED) + munmap(data->region, data->mapsize); + data->region = MAP_FAILED; + if (data->expected_pfn != MAP_FAILED) + munmap(data->expected_pfn, sizeof(unsigned long)); + data->expected_pfn = MAP_FAILED; + semctl(data->semid, 0, IPC_RMID); + data->semid = -1; + + close(data->pipefd[0]); + + switch (data->backend) { + case ANON: + break; + case SHM: + shm_unlink(data->filename); + break; + case NORM_FILE: + unlink(data->filename); + break; + } +} + +void access_region(struct global_data *data) +{ + /* + * Force read "region" to make sure page fault in. + */ + FORCE_READ(*data->region); +} + +int try_to_move_page(char *region) +{ + int ret; + int node; + int status = 0; + int failures = 0; + + ret = move_pages(0, 1, (void **)®ion, NULL, &status, MPOL_MF_MOVE_ALL); + if (ret != 0) { + perror("Failed to get original numa"); + return FAIL_ON_WORK; + } + + /* Pick up a different target node */ + for (node = 0; node <= numa_max_node(); node++) { + if (numa_bitmask_isbitset(numa_all_nodes_ptr, node) && node != status) + break; + } + + if (node > numa_max_node()) { + ksft_print_msg("Couldn't find available numa node for testing\n"); + return FAIL_ON_WORK; + } + + while (1) { + ret = move_pages(0, 1, (void **)®ion, &node, &status, MPOL_MF_MOVE_ALL); + + /* migrate successfully */ + if (!ret) + break; + + /* error happened */ + if (ret < 0) { + ksft_perror("Failed to move pages"); + return FAIL_ON_WORK; + } + + /* migration is best effort; try again */ + if (++failures >= 100) + return FAIL_ON_WORK; + } + + return 0; +} + +int move_region(struct global_data *data) +{ + int ret; + int pagemap_fd; + + ret = try_to_move_page(data->region); + if (ret != 0) + return ret; + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd == -1) + return FAIL_ON_WORK; + *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region); + + return 0; +} + +int has_same_pfn(struct global_data *data) +{ + unsigned long pfn; + int pagemap_fd; + + if (data->region == MAP_FAILED) + return 0; + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd == -1) + return FAIL_ON_CHECK; + + pfn = pagemap_get_pfn(pagemap_fd, data->region); + if (pfn != *data->expected_pfn) + return FAIL_ON_CHECK; + + return 0; +} + +TEST_F(migrate, anon) +{ + struct global_data *data = &self->data; + + /* Map an area and fault in */ + data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(data->region, MAP_FAILED); + memset(data->region, 0xcf, data->mapsize); + + data->do_prepare = access_region; + data->do_work = move_region; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +TEST_F(migrate, shm) +{ + int shm_fd; + struct global_data *data = &self->data; + + snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[SHM]); + shm_fd = shm_open(data->filename, O_CREAT | O_RDWR, 0666); + ASSERT_NE(shm_fd, -1); + ftruncate(shm_fd, data->mapsize); + data->backend = SHM; + + /* Map a shared area and fault in */ + data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE, + MAP_SHARED, shm_fd, 0); + ASSERT_NE(data->region, MAP_FAILED); + memset(data->region, 0xcf, data->mapsize); + close(shm_fd); + + data->do_prepare = access_region; + data->do_work = move_region; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +TEST_F(migrate, file) +{ + int fd; + struct global_data *data = &self->data; + + snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[NORM_FILE]); + fd = open(data->filename, O_CREAT | O_RDWR | O_EXCL, 0666); + ASSERT_NE(fd, -1); + ftruncate(fd, data->mapsize); + data->backend = NORM_FILE; + + /* Map a shared area and fault in */ + data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + ASSERT_NE(data->region, MAP_FAILED); + memset(data->region, 0xcf, data->mapsize); + close(fd); + + data->do_prepare = access_region; + data->do_work = move_region; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +void prepare_local_region(struct global_data *data) +{ + /* Allocate range and set the same data */ + data->region = mmap(NULL, data->mapsize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (data->region == MAP_FAILED) + return; + + memset(data->region, 0xcf, data->mapsize); +} + +int merge_and_migrate(struct global_data *data) +{ + int pagemap_fd; + int ret = 0; + + if (data->region == MAP_FAILED) + return FAIL_ON_WORK; + + if (ksm_start() < 0) + return FAIL_ON_WORK; + + ret = try_to_move_page(data->region); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd == -1) + return FAIL_ON_WORK; + *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region); + + return ret; +} + +TEST_F(migrate, ksm) +{ + int ret; + struct global_data *data = &self->data; + + if (ksm_stop() < 0) + SKIP(return, "accessing \"/sys/kernel/mm/ksm/run\") failed"); + if (ksm_get_full_scans() < 0) + SKIP(return, "accessing \"/sys/kernel/mm/ksm/full_scan\") failed"); + + ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); + if (ret < 0 && errno == EINVAL) + SKIP(return, "PR_SET_MEMORY_MERGE not supported"); + else if (ret) + ksft_exit_fail_perror("PR_SET_MEMORY_MERGE=1 failed"); + + data->do_prepare = prepare_local_region; + data->do_work = merge_and_migrate; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 471e539d82b8..d9173f2312b7 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -85,6 +85,8 @@ separated by spaces: test handling of page fragment allocation and freeing - vma_merge test VMA merge cases behave as expected +- rmap + test rmap behaves as expected example: ./run_vmtests.sh -t "hmm mmap ksm" EOF @@ -136,7 +138,7 @@ run_gup_matrix() { # -n: How many pages to fetch together? 512 is special # because it's default thp size (or 2M on x86), 123 to # just test partial gup when hit a huge in whatever form - for num in "-n 1" "-n 512" "-n 123"; do + for num in "-n 1" "-n 512" "-n 123" "-n -1"; do CATEGORY="gup_test" run_test ./gup_test \ $huge $test_cmd $write $share $num done @@ -172,13 +174,13 @@ fi # set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then - nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) + orig_nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) needpgs=$((needmem_KB / hpgsize_KB)) tries=2 while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do lackpgs=$((needpgs - freepgs)) echo 3 > /proc/sys/vm/drop_caches - if ! echo $((lackpgs + nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then + if ! echo $((lackpgs + orig_nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then echo "Please run this test as root" exit $ksft_skip fi @@ -189,6 +191,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then done < /proc/meminfo tries=$((tries - 1)) done + nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) if [ "$freepgs" -lt "$needpgs" ]; then printf "Not enough huge pages available (%d < %d)\n" \ "$freepgs" "$needpgs" @@ -311,9 +314,11 @@ if $RUN_ALL; then run_gup_matrix else # get_user_pages_fast() benchmark - CATEGORY="gup_test" run_test ./gup_test -u + CATEGORY="gup_test" run_test ./gup_test -u -n 1 + CATEGORY="gup_test" run_test ./gup_test -u -n -1 # pin_user_pages_fast() benchmark - CATEGORY="gup_test" run_test ./gup_test -a + CATEGORY="gup_test" run_test ./gup_test -a -n 1 + CATEGORY="gup_test" run_test ./gup_test -a -n -1 fi # Dump pages 0, 19, and 4096, using pin_user_pages: CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000 @@ -322,11 +327,15 @@ CATEGORY="gup_test" run_test ./gup_longterm CATEGORY="userfaultfd" run_test ./uffd-unit-tests uffd_stress_bin=./uffd-stress CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 -# Hugetlb tests require source and destination huge pages. Pass in half -# the size of the free pages we have, which is used for *each*. +# Hugetlb tests require source and destination huge pages. Pass in almost half +# the size of the free pages we have, which is used for *each*. An adjustment +# of (nr_parallel - 1) is done (see nr_parallel in uffd-stress.c) to have some +# extra hugepages - this is done to prevent the test from failing by racily +# reserving more hugepages than strictly required. # uffd-stress expects a region expressed in MiB, so we adjust # half_ufd_size_MB accordingly. -half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2)) +adjustment=$(( (31 < (nr_cpus - 1)) ? 31 : (nr_cpus - 1) )) +half_ufd_size_MB=$((((freepgs - adjustment) * hpgsize_KB) / 1024 / 2)) CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 @@ -532,6 +541,12 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh aligned CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned +CATEGORY="rmap" run_test ./rmap + +if [ "${HAVE_HUGEPAGES}" = 1 ]; then + echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages +fi + echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix echo "1..${count_total}" | tap_output diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index 8a3f2b4b2186..4ee4db3750c1 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -200,8 +200,11 @@ int main(int argc, char **argv) int pagesize; ksft_print_header(); - ksft_set_plan(15); + if (!softdirty_supported()) + ksft_exit_skip("soft-dirty is not support\n"); + + ksft_set_plan(15); pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY); if (pagemap_fd < 0) ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH); diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 44a3f8a58806..743af3c05190 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -25,6 +25,8 @@ uint64_t pagesize; unsigned int pageshift; uint64_t pmd_pagesize; +unsigned int pmd_order; +int *expected_orders; #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" #define SMAP_PATH "/proc/self/smaps" @@ -34,28 +36,225 @@ uint64_t pmd_pagesize; #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" #define PATH_FMT "%s,0x%lx,0x%lx,%d" -#define PFN_MASK ((1UL<<55)-1) -#define KPF_THP (1UL<<22) +const char *pagemap_proc = "/proc/self/pagemap"; +const char *kpageflags_proc = "/proc/kpageflags"; +int pagemap_fd; +int kpageflags_fd; -int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) +static bool is_backed_by_folio(char *vaddr, int order, int pagemap_fd, + int kpageflags_fd) { - uint64_t paddr; - uint64_t page_flags; + const uint64_t folio_head_flags = KPF_THP | KPF_COMPOUND_HEAD; + const uint64_t folio_tail_flags = KPF_THP | KPF_COMPOUND_TAIL; + const unsigned long nr_pages = 1UL << order; + unsigned long pfn_head; + uint64_t pfn_flags; + unsigned long pfn; + unsigned long i; + + pfn = pagemap_get_pfn(pagemap_fd, vaddr); + + /* non present page */ + if (pfn == -1UL) + return false; + + if (pageflags_get(pfn, kpageflags_fd, &pfn_flags)) + goto fail; + + /* check for order-0 pages */ + if (!order) { + if (pfn_flags & (folio_head_flags | folio_tail_flags)) + return false; + return true; + } + + /* non THP folio */ + if (!(pfn_flags & KPF_THP)) + return false; + + pfn_head = pfn & ~(nr_pages - 1); + + if (pageflags_get(pfn_head, kpageflags_fd, &pfn_flags)) + goto fail; + + /* head PFN has no compound_head flag set */ + if ((pfn_flags & folio_head_flags) != folio_head_flags) + return false; + + /* check all tail PFN flags */ + for (i = 1; i < nr_pages; i++) { + if (pageflags_get(pfn_head + i, kpageflags_fd, &pfn_flags)) + goto fail; + if ((pfn_flags & folio_tail_flags) != folio_tail_flags) + return false; + } + + /* + * check the PFN after this folio, but if its flags cannot be obtained, + * assume this folio has the expected order + */ + if (pageflags_get(pfn_head + nr_pages, kpageflags_fd, &pfn_flags)) + return true; + + /* If we find another tail page, then the folio is larger. */ + return (pfn_flags & folio_tail_flags) != folio_tail_flags; +fail: + ksft_exit_fail_msg("Failed to get folio info\n"); + return false; +} + +static int vaddr_pageflags_get(char *vaddr, int pagemap_fd, int kpageflags_fd, + uint64_t *flags) +{ + unsigned long pfn; + + pfn = pagemap_get_pfn(pagemap_fd, vaddr); + + /* non-present PFN */ + if (pfn == -1UL) + return 1; + + if (pageflags_get(pfn, kpageflags_fd, flags)) + return -1; + + return 0; +} + +/* + * gather_after_split_folio_orders - scan through [vaddr_start, len) and record + * folio orders + * + * @vaddr_start: start vaddr + * @len: range length + * @pagemap_fd: file descriptor to /proc/<pid>/pagemap + * @kpageflags_fd: file descriptor to /proc/kpageflags + * @orders: output folio order array + * @nr_orders: folio order array size + * + * gather_after_split_folio_orders() scan through [vaddr_start, len) and check + * all folios within the range and record their orders. All order-0 pages will + * be recorded. Non-present vaddr is skipped. + * + * NOTE: the function is used to check folio orders after a split is performed, + * so it assumes [vaddr_start, len) fully maps to after-split folios within that + * range. + * + * Return: 0 - no error, -1 - unhandled cases + */ +static int gather_after_split_folio_orders(char *vaddr_start, size_t len, + int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders) +{ + uint64_t page_flags = 0; + int cur_order = -1; + char *vaddr; + + if (pagemap_fd == -1 || kpageflags_fd == -1) + return -1; + if (!orders) + return -1; + if (nr_orders <= 0) + return -1; + + for (vaddr = vaddr_start; vaddr < vaddr_start + len;) { + char *next_folio_vaddr; + int status; + + status = vaddr_pageflags_get(vaddr, pagemap_fd, kpageflags_fd, + &page_flags); + if (status < 0) + return -1; + + /* skip non present vaddr */ + if (status == 1) { + vaddr += psize(); + continue; + } - if (pagemap_file) { - pread(pagemap_file, &paddr, sizeof(paddr), - ((long)vaddr >> pageshift) * sizeof(paddr)); + /* all order-0 pages with possible false postive (non folio) */ + if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { + orders[0]++; + vaddr += psize(); + continue; + } - if (kpageflags_file) { - pread(kpageflags_file, &page_flags, sizeof(page_flags), - (paddr & PFN_MASK) * sizeof(page_flags)); + /* skip non thp compound pages */ + if (!(page_flags & KPF_THP)) { + vaddr += psize(); + continue; + } - return !!(page_flags & KPF_THP); + /* vpn points to part of a THP at this point */ + if (page_flags & KPF_COMPOUND_HEAD) + cur_order = 1; + else { + vaddr += psize(); + continue; } + + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); + + if (next_folio_vaddr >= vaddr_start + len) + break; + + while ((status = vaddr_pageflags_get(next_folio_vaddr, + pagemap_fd, kpageflags_fd, + &page_flags)) >= 0) { + /* + * non present vaddr, next compound head page, or + * order-0 page + */ + if (status == 1 || + (page_flags & KPF_COMPOUND_HEAD) || + !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { + if (cur_order < nr_orders) { + orders[cur_order]++; + cur_order = -1; + vaddr = next_folio_vaddr; + } + break; + } + + cur_order++; + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); + } + + if (status < 0) + return status; } + if (cur_order > 0 && cur_order < nr_orders) + orders[cur_order]++; return 0; } +static int check_after_split_folio_orders(char *vaddr_start, size_t len, + int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders) +{ + int *vaddr_orders; + int status; + int i; + + vaddr_orders = (int *)malloc(sizeof(int) * nr_orders); + + if (!vaddr_orders) + ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders"); + + memset(vaddr_orders, 0, sizeof(int) * nr_orders); + status = gather_after_split_folio_orders(vaddr_start, len, pagemap_fd, + kpageflags_fd, vaddr_orders, nr_orders); + if (status) + ksft_exit_fail_msg("gather folio info failed\n"); + + for (i = 0; i < nr_orders; i++) + if (vaddr_orders[i] != orders[i]) { + ksft_print_msg("order %d: expected: %d got %d\n", i, + orders[i], vaddr_orders[i]); + status = -1; + } + + free(vaddr_orders); + return status; +} + static void write_file(const char *path, const char *buf, size_t buflen) { int fd; @@ -111,7 +310,7 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp unsigned long rss_anon_before, rss_anon_after; size_t i; - if (!check_huge_anon(one_page, 4, pmd_pagesize)) + if (!check_huge_anon(one_page, nr_hpages, pmd_pagesize)) ksft_exit_fail_msg("No THP is allocated\n"); rss_anon_before = rss_anon(); @@ -135,7 +334,7 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp rss_anon_before, rss_anon_after); } -void split_pmd_zero_pages(void) +static void split_pmd_zero_pages(void) { char *one_page; int nr_hpages = 4; @@ -147,7 +346,7 @@ void split_pmd_zero_pages(void) free(one_page); } -void split_pmd_thp_to_order(int order) +static void split_pmd_thp_to_order(int order) { char *one_page; size_t len = 4 * pmd_pagesize; @@ -173,6 +372,13 @@ void split_pmd_thp_to_order(int order) if (one_page[i] != (char)i) ksft_exit_fail_msg("%ld byte corrupted\n", i); + memset(expected_orders, 0, sizeof(int) * (pmd_order + 1)); + expected_orders[order] = 4 << (pmd_order - order); + + if (check_after_split_folio_orders(one_page, len, pagemap_fd, + kpageflags_fd, expected_orders, + (pmd_order + 1))) + ksft_exit_fail_msg("Unexpected THP split\n"); if (!check_huge_anon(one_page, 0, pmd_pagesize)) ksft_exit_fail_msg("Still AnonHugePages not split\n"); @@ -181,90 +387,97 @@ void split_pmd_thp_to_order(int order) free(one_page); } -void split_pte_mapped_thp(void) +static void split_pte_mapped_thp(void) { - char *one_page, *pte_mapped, *pte_mapped2; - size_t len = 4 * pmd_pagesize; - uint64_t thp_size; + const size_t nr_thps = 4; + const size_t thp_area_size = nr_thps * pmd_pagesize; + const size_t page_area_size = nr_thps * pagesize; + char *thp_area, *tmp, *page_area = MAP_FAILED; size_t i; - const char *pagemap_template = "/proc/%d/pagemap"; - const char *kpageflags_proc = "/proc/kpageflags"; - char pagemap_proc[255]; - int pagemap_fd; - int kpageflags_fd; - if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) - ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); + thp_area = mmap((void *)(1UL << 30), thp_area_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (thp_area == MAP_FAILED) { + ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno)); + return; + } - pagemap_fd = open(pagemap_proc, O_RDONLY); - if (pagemap_fd == -1) - ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); + madvise(thp_area, thp_area_size, MADV_HUGEPAGE); - kpageflags_fd = open(kpageflags_proc, O_RDONLY); - if (kpageflags_fd == -1) - ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); + for (i = 0; i < thp_area_size; i++) + thp_area[i] = (char)i; - one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (one_page == MAP_FAILED) - ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); + if (!check_huge_anon(thp_area, nr_thps, pmd_pagesize)) { + ksft_test_result_skip("Not all THPs allocated\n"); + goto out; + } - madvise(one_page, len, MADV_HUGEPAGE); + /* + * To challenge spitting code, we will mremap a single page of each + * THP (page[i] of thp[i]) in the thp_area into page_area. This will + * replace the PMD mappings in the thp_area by PTE mappings first, + * but leaving the THP unsplit, to then create a page-sized hole in + * the thp_area. + * We will then manually trigger splitting of all THPs through the + * single mremap'ed pages of each THP in the page_area. + */ + page_area = mmap(NULL, page_area_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (page_area == MAP_FAILED) { + ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno)); + goto out; + } - for (i = 0; i < len; i++) - one_page[i] = (char)i; + for (i = 0; i < nr_thps; i++) { + tmp = mremap(thp_area + pmd_pagesize * i + pagesize * i, + pagesize, pagesize, MREMAP_MAYMOVE|MREMAP_FIXED, + page_area + pagesize * i); + if (tmp != MAP_FAILED) + continue; + ksft_test_result_fail("mremap failed: %s\n", strerror(errno)); + goto out; + } - if (!check_huge_anon(one_page, 4, pmd_pagesize)) - ksft_exit_fail_msg("No THP is allocated\n"); + /* + * Verify that our THPs were not split yet. Note that + * check_huge_anon() cannot be used as it checks for PMD mappings. + */ + for (i = 0; i < nr_thps; i++) { + if (is_backed_by_folio(page_area + i * pagesize, pmd_order, + pagemap_fd, kpageflags_fd)) + continue; + ksft_test_result_fail("THP %zu missing after mremap\n", i); + goto out; + } - /* remap the first pagesize of first THP */ - pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); - - /* remap the Nth pagesize of Nth THP */ - for (i = 1; i < 4; i++) { - pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, - pagesize, pagesize, - MREMAP_MAYMOVE|MREMAP_FIXED, - pte_mapped + pagesize * i); - if (pte_mapped2 == MAP_FAILED) - ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); - } - - /* smap does not show THPs after mremap, use kpageflags instead */ - thp_size = 0; - for (i = 0; i < pagesize * 4; i++) - if (i % pagesize == 0 && - is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) - thp_size++; - - if (thp_size != 4) - ksft_exit_fail_msg("Some THPs are missing during mremap\n"); - - /* split all remapped THPs */ - write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, - (uint64_t)pte_mapped + pagesize * 4, 0); - - /* smap does not show THPs after mremap, use kpageflags instead */ - thp_size = 0; - for (i = 0; i < pagesize * 4; i++) { - if (pte_mapped[i] != (char)i) - ksft_exit_fail_msg("%ld byte corrupted\n", i); + /* Split all THPs through the remapped pages. */ + write_debugfs(PID_FMT, getpid(), (uint64_t)page_area, + (uint64_t)page_area + page_area_size, 0); - if (i % pagesize == 0 && - is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) - thp_size++; + /* Corruption during mremap or split? */ + for (i = 0; i < page_area_size; i++) { + if (page_area[i] == (char)i) + continue; + ksft_test_result_fail("%zu byte corrupted\n", i); + goto out; } - if (thp_size) - ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); + /* Split failed? */ + for (i = 0; i < nr_thps; i++) { + if (is_backed_by_folio(page_area + i * pagesize, 0, + pagemap_fd, kpageflags_fd)) + continue; + ksft_test_result_fail("THP %zu not split\n", i); + } ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); - munmap(one_page, len); - close(pagemap_fd); - close(kpageflags_fd); +out: + munmap(thp_area, thp_area_size); + if (page_area != MAP_FAILED) + munmap(page_area, page_area_size); } -void split_file_backed_thp(int order) +static void split_file_backed_thp(int order) { int status; int fd; @@ -297,7 +510,7 @@ void split_file_backed_thp(int order) status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); if (status >= INPUT_MAX) { - ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); + ksft_print_msg("Fail to create file-backed THP split testing file\n"); goto cleanup; } @@ -366,7 +579,7 @@ out: ksft_exit_fail_msg("Error occurred\n"); } -bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, +static bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, const char **thp_fs_loc) { if (xfs_path) { @@ -382,7 +595,7 @@ bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, return true; } -void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) +static void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) { int status; @@ -395,8 +608,8 @@ void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) strerror(errno)); } -int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, - char **addr) +static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, + int *fd, char **addr) { size_t i; unsigned char buf[1024]; @@ -462,10 +675,11 @@ err_out_unlink: return -1; } -void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, - int order, int offset) +static void split_thp_in_pagecache_to_order_at(size_t fd_size, + const char *fs_loc, int order, int offset) { int fd; + char *split_addr; char *addr; size_t i; char testfile[INPUT_MAX]; @@ -479,14 +693,33 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); if (err) return; + err = 0; - if (offset == -1) - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, - (uint64_t)addr + fd_size, order); - else - write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr, - (uint64_t)addr + fd_size, order, offset); + memset(expected_orders, 0, sizeof(int) * (pmd_order + 1)); + /* + * use [split_addr, split_addr + pagesize) range to split THPs, since + * the debugfs function always split a range with pagesize step and + * providing a full [addr, addr + fd_size) range can trigger multiple + * splits, complicating after-split result checking. + */ + if (offset == -1) { + for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize) + write_debugfs(PID_FMT, getpid(), (uint64_t)split_addr, + (uint64_t)split_addr + pagesize, order); + + expected_orders[order] = fd_size / (pagesize << order); + } else { + int times = fd_size / pmd_pagesize; + + for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize) + write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)split_addr, + (uint64_t)split_addr + pagesize, order, offset); + + for (i = order + 1; i < pmd_order; i++) + expected_orders[i] = times; + expected_orders[order] = 2 * times; + } for (i = 0; i < fd_size; i++) if (*(addr + i) != (char)i) { @@ -495,6 +728,14 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, goto out; } + if (check_after_split_folio_orders(addr, fd_size, pagemap_fd, + kpageflags_fd, expected_orders, + (pmd_order + 1))) { + ksft_print_msg("Unexpected THP split\n"); + err = 1; + goto out; + } + if (!check_huge_file(addr, 0, pmd_pagesize)) { ksft_print_msg("Still FilePmdMapped not split\n"); err = EXIT_FAILURE; @@ -525,6 +766,8 @@ int main(int argc, char **argv) const char *fs_loc; bool created_tmp; int offset; + unsigned int nr_pages; + unsigned int tests; ksft_print_header(); @@ -536,38 +779,58 @@ int main(int argc, char **argv) if (argc > 1) optional_xfs_path = argv[1]; - ksft_set_plan(1+8+1+9+9+8*4+2); - pagesize = getpagesize(); pageshift = ffs(pagesize) - 1; pmd_pagesize = read_pmd_pagesize(); if (!pmd_pagesize) ksft_exit_fail_msg("Reading PMD pagesize failed\n"); + nr_pages = pmd_pagesize / pagesize; + pmd_order = sz2ord(pmd_pagesize, pagesize); + + expected_orders = (int *)malloc(sizeof(int) * (pmd_order + 1)); + if (!expected_orders) + ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); + + tests = 2 + (pmd_order - 1) + (2 * pmd_order) + (pmd_order - 1) * 4 + 2; + ksft_set_plan(tests); + + pagemap_fd = open(pagemap_proc, O_RDONLY); + if (pagemap_fd == -1) + ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); + + kpageflags_fd = open(kpageflags_proc, O_RDONLY); + if (kpageflags_fd == -1) + ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); + fd_size = 2 * pmd_pagesize; split_pmd_zero_pages(); - for (i = 0; i < 9; i++) + for (i = 0; i < pmd_order; i++) if (i != 1) split_pmd_thp_to_order(i); split_pte_mapped_thp(); - for (i = 0; i < 9; i++) + for (i = 0; i < pmd_order; i++) split_file_backed_thp(i); created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, &fs_loc); - for (i = 8; i >= 0; i--) + for (i = pmd_order - 1; i >= 0; i--) split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1); - for (i = 0; i < 9; i++) + for (i = 0; i < pmd_order; i++) for (offset = 0; - offset < pmd_pagesize / pagesize; - offset += MAX(pmd_pagesize / pagesize / 4, 1 << i)) + offset < nr_pages; + offset += MAX(nr_pages / 4, 1 << i)) split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset); cleanup_thp_fs(fs_loc, created_tmp); + close(pagemap_fd); + close(kpageflags_fd); + free(expected_orders); + ksft_finished(); return 0; diff --git a/tools/testing/selftests/mm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh index d73b846736f1..d39096723fca 100755 --- a/tools/testing/selftests/mm/test_vmalloc.sh +++ b/tools/testing/selftests/mm/test_vmalloc.sh @@ -47,14 +47,14 @@ check_test_requirements() fi } -run_perfformance_check() +run_performance_check() { echo "Run performance tests to evaluate how fast vmalloc allocation is." echo "It runs all test cases on one single CPU with sequential order." modprobe $DRIVER $PERF_PARAM > /dev/null 2>&1 echo "Done." - echo "Ccheck the kernel message buffer to see the summary." + echo "Check the kernel message buffer to see the summary." } run_stability_check() @@ -160,7 +160,7 @@ function run_test() usage else if [[ "$1" = "performance" ]]; then - run_perfformance_check + run_performance_check elif [[ "$1" = "stress" ]]; then run_stability_check elif [[ "$1" = "smoke" ]]; then diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index bad60ac52874..574bd0f8ae48 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -382,10 +382,17 @@ unsigned long thp_shmem_supported_orders(void) return __thp_supported_orders(true); } -bool thp_is_enabled(void) +bool thp_available(void) { if (access(THP_SYSFS, F_OK) != 0) return false; + return true; +} + +bool thp_is_enabled(void) +{ + if (!thp_available()) + return false; int mode = thp_read_string("enabled", thp_enabled_strings); diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 6c07f70beee9..76eeb712e5f1 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -84,6 +84,7 @@ void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); unsigned long thp_shmem_supported_orders(void); +bool thp_available(void); bool thp_is_enabled(void); #endif /* __THP_SETTINGS_H__ */ diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 8e2b08dc5762..4f5e290ff1a6 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -177,13 +177,16 @@ void find_pagesizes(void) globfree(&g); read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val); - if (shmmax_val < NUM_PAGES * largest) - ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", - largest * NUM_PAGES); + if (shmmax_val < NUM_PAGES * largest) { + ksft_print_msg("WARNING: shmmax is too small to run this test.\n"); + ksft_print_msg("Please run the following command to increase shmmax:\n"); + ksft_print_msg("echo %lu > /proc/sys/kernel/shmmax\n", largest * NUM_PAGES); + ksft_exit_skip("Test skipped due to insufficient shmmax value.\n"); + } #if defined(__x86_64__) if (largest != 1U<<30) { - ksft_exit_fail_msg("No GB pages available on x86-64\n" + ksft_exit_skip("No GB pages available on x86-64\n" "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES); } #endif diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index a37088a23ffe..994fe8c03923 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -7,18 +7,29 @@ #include "uffd-common.h" -#define BASE_PMD_ADDR ((void *)(1UL << 30)) - -volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -int uffd = -1, uffd_flags, finished, *pipefd, test_type; -bool map_shared; -bool test_uffdio_wp = true; -unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; uffd_test_case_ops_t *uffd_test_case_ops; -atomic_bool ready_for_fork; + +#define BASE_PMD_ADDR ((void *)(1UL << 30)) + +/* pthread_mutex_t starts at page offset 0 */ +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts) +{ + return (pthread_mutex_t *) (area + nr * gopts->page_size); +} + +/* + * count is placed in the page after pthread_mutex_t naturally aligned + * to avoid non alignment faults on non-x86 archs. + */ +volatile unsigned long long *area_count(char *area, unsigned long nr, + uffd_global_test_opts_t *gopts) +{ + return (volatile unsigned long long *) + ((unsigned long)(area + nr * gopts->page_size + + sizeof(pthread_mutex_t) + sizeof(unsigned long long) - 1) & + ~(unsigned long)(sizeof(unsigned long long) - 1)); +} static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -40,15 +51,15 @@ static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) return mem_fd; } -static void anon_release_pages(char *rel_area) +static void anon_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } -static int anon_allocate_area(void **alloc_area, bool is_src) +static int anon_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + *alloc_area = mmap(NULL, gopts->nr_pages * gopts->page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (*alloc_area == MAP_FAILED) { *alloc_area = NULL; @@ -57,31 +68,32 @@ static int anon_allocate_area(void **alloc_area, bool is_src) return 0; } -static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void noop_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { } -static void hugetlb_release_pages(char *rel_area) +static void hugetlb_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (!map_shared) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (!gopts->map_shared) { + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } else { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } } -static int hugetlb_allocate_area(void **alloc_area, bool is_src) +static int hugetlb_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { - off_t size = nr_pages * page_size; + off_t size = gopts->nr_pages * gopts->page_size; off_t offset = is_src ? 0 : size; void *area_alias = NULL; char **alloc_area_alias; int mem_fd = uffd_mem_fd_create(size * 2, true); *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, - (map_shared ? MAP_SHARED : MAP_PRIVATE) | + (gopts->map_shared ? MAP_SHARED : MAP_PRIVATE) | (is_src ? 0 : MAP_NORESERVE), mem_fd, offset); if (*alloc_area == MAP_FAILED) { @@ -89,7 +101,7 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return -errno; } - if (map_shared) { + if (gopts->map_shared) { area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, offset); if (area_alias == MAP_FAILED) @@ -97,9 +109,9 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) } if (is_src) { - alloc_area_alias = &area_src_alias; + alloc_area_alias = &gopts->area_src_alias; } else { - alloc_area_alias = &area_dst_alias; + alloc_area_alias = &gopts->area_dst_alias; } if (area_alias) *alloc_area_alias = area_alias; @@ -108,24 +120,25 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return 0; } -static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void hugetlb_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { - if (!map_shared) + if (!gopts->map_shared) return; - *start = (unsigned long) area_dst_alias + offset; + *start = (unsigned long) gopts->area_dst_alias + offset; } -static void shmem_release_pages(char *rel_area) +static void shmem_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } -static int shmem_allocate_area(void **alloc_area, bool is_src) +static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { void *area_alias = NULL; - size_t bytes = nr_pages * page_size, hpage_size = read_pmd_pagesize(); + size_t bytes = gopts->nr_pages * gopts->page_size, hpage_size = read_pmd_pagesize(); unsigned long offset = is_src ? 0 : bytes; char *p = NULL, *p_alias = NULL; int mem_fd = uffd_mem_fd_create(bytes * 2, false); @@ -159,22 +172,23 @@ static int shmem_allocate_area(void **alloc_area, bool is_src) err("mmap of anonymous memory failed at %p", p_alias); if (is_src) - area_src_alias = area_alias; + gopts->area_src_alias = area_alias; else - area_dst_alias = area_alias; + gopts->area_dst_alias = area_alias; close(mem_fd); return 0; } -static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void shmem_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { - *start = (unsigned long)area_dst_alias + offset; + *start = (unsigned long)gopts->area_dst_alias + offset; } -static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages) +static void shmem_check_pmd_mapping(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages) { - if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, + if (!check_huge_shmem(gopts->area_dst_alias, expect_nr_hpages, read_pmd_pagesize())) err("Did not find expected %d number of hugepages", expect_nr_hpages); @@ -234,18 +248,18 @@ void uffd_stats_report(struct uffd_args *args, int n_cpus) printf("\n"); } -int userfaultfd_open(uint64_t *features) +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features) { struct uffdio_api uffdio_api; - uffd = uffd_open(UFFD_FLAGS); - if (uffd < 0) + gopts->uffd = uffd_open(UFFD_FLAGS); + if (gopts->uffd < 0) return -1; - uffd_flags = fcntl(uffd, F_GETFD, NULL); + gopts->uffd_flags = fcntl(gopts->uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; uffdio_api.features = *features; - if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + if (ioctl(gopts->uffd, UFFDIO_API, &uffdio_api)) /* Probably lack of CAP_PTRACE? */ return -1; if (uffdio_api.api != UFFD_API) @@ -255,59 +269,63 @@ int userfaultfd_open(uint64_t *features) return 0; } -static inline void munmap_area(void **area) +static inline void munmap_area(uffd_global_test_opts_t *gopts, void **area) { if (*area) - if (munmap(*area, nr_pages * page_size)) + if (munmap(*area, gopts->nr_pages * gopts->page_size)) err("munmap"); *area = NULL; } -void uffd_test_ctx_clear(void) +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts) { size_t i; - if (pipefd) { - for (i = 0; i < nr_parallel * 2; ++i) { - if (close(pipefd[i])) + if (gopts->pipefd) { + for (i = 0; i < gopts->nr_parallel * 2; ++i) { + if (close(gopts->pipefd[i])) err("close pipefd"); } - free(pipefd); - pipefd = NULL; + free(gopts->pipefd); + gopts->pipefd = NULL; } - if (count_verify) { - free(count_verify); - count_verify = NULL; + if (gopts->count_verify) { + free(gopts->count_verify); + gopts->count_verify = NULL; } - if (uffd != -1) { - if (close(uffd)) + if (gopts->uffd != -1) { + if (close(gopts->uffd)) err("close uffd"); - uffd = -1; + gopts->uffd = -1; } - munmap_area((void **)&area_src); - munmap_area((void **)&area_src_alias); - munmap_area((void **)&area_dst); - munmap_area((void **)&area_dst_alias); - munmap_area((void **)&area_remap); + munmap_area(gopts, (void **)&gopts->area_src); + munmap_area(gopts, (void **)&gopts->area_src_alias); + munmap_area(gopts, (void **)&gopts->area_dst); + munmap_area(gopts, (void **)&gopts->area_dst_alias); + munmap_area(gopts, (void **)&gopts->area_remap); } -int uffd_test_ctx_init(uint64_t features, const char **errmsg) +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg) { unsigned long nr, cpu; int ret; + gopts->area_src_alias = NULL; + gopts->area_dst_alias = NULL; + gopts->area_remap = NULL; + if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { - ret = uffd_test_case_ops->pre_alloc(errmsg); + ret = uffd_test_case_ops->pre_alloc(gopts, errmsg); if (ret) return ret; } - ret = uffd_test_ops->allocate_area((void **)&area_src, true); - ret |= uffd_test_ops->allocate_area((void **)&area_dst, false); + ret = uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_src, true); + ret |= uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_dst, false); if (ret) { if (errmsg) *errmsg = "memory allocation failed"; @@ -315,26 +333,26 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) } if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { - ret = uffd_test_case_ops->post_alloc(errmsg); + ret = uffd_test_case_ops->post_alloc(gopts, errmsg); if (ret) return ret; } - ret = userfaultfd_open(&features); + ret = userfaultfd_open(gopts, &features); if (ret) { if (errmsg) *errmsg = "possible lack of privilege"; return ret; } - count_verify = malloc(nr_pages * sizeof(unsigned long long)); - if (!count_verify) + gopts->count_verify = malloc(gopts->nr_pages * sizeof(unsigned long long)); + if (!gopts->count_verify) err("count_verify"); - for (nr = 0; nr < nr_pages; nr++) { - *area_mutex(area_src, nr) = + for (nr = 0; nr < gopts->nr_pages; nr++) { + *area_mutex(gopts->area_src, nr, gopts) = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; - count_verify[nr] = *area_count(area_src, nr) = 1; + gopts->count_verify[nr] = *area_count(gopts->area_src, nr, gopts) = 1; /* * In the transition between 255 to 256, powerpc will * read out of order in my_bcmp and see both bytes as @@ -342,7 +360,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * after the count, to avoid my_bcmp to trigger false * positives. */ - *(area_count(area_src, nr) + 1) = 1; + *(area_count(gopts->area_src, nr, gopts) + 1) = 1; } /* @@ -363,13 +381,13 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * proactively split the thp and drop any accidentally initialized * pages within area_dst. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - pipefd = malloc(sizeof(int) * nr_parallel * 2); - if (!pipefd) + gopts->pipefd = malloc(sizeof(int) * gopts->nr_parallel * 2); + if (!gopts->pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_parallel; cpu++) - if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) + if (pipe2(&gopts->pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); return 0; @@ -416,9 +434,9 @@ static void continue_range(int ufd, __u64 start, __u64 len, bool wp) ret, (int64_t) req.mapped); } -int uffd_read_msg(int ufd, struct uffd_msg *msg) +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg) { - int ret = read(uffd, msg, sizeof(*msg)); + int ret = read(gopts->uffd, msg, sizeof(*msg)); if (ret != sizeof(*msg)) { if (ret < 0) { @@ -433,7 +451,8 @@ int uffd_read_msg(int ufd, struct uffd_msg *msg) return 0; } -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, + struct uffd_args *args) { unsigned long offset; @@ -442,7 +461,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { /* Write protect page faults */ - wp_range(uffd, msg->arg.pagefault.address, page_size, false); + wp_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, false); args->wp_faults++; } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { uint8_t *area; @@ -460,12 +479,12 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) * (UFFD-registered). */ - area = (uint8_t *)(area_dst + - ((char *)msg->arg.pagefault.address - - area_dst_alias)); - for (b = 0; b < page_size; ++b) + area = (uint8_t *)(gopts->area_dst + + ((char *)msg->arg.pagefault.address - + gopts->area_dst_alias)); + for (b = 0; b < gopts->page_size; ++b) area[b] = ~area[b]; - continue_range(uffd, msg->arg.pagefault.address, page_size, + continue_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, args->apply_wp); args->minor_faults++; } else { @@ -493,10 +512,10 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) err("unexpected write fault"); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); - if (copy_page(uffd, offset, args->apply_wp)) + if (copy_page(gopts, offset, args->apply_wp)) args->missing_faults++; } } @@ -504,6 +523,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) void *uffd_poll_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; unsigned long cpu = args->cpu; struct pollfd pollfd[2]; struct uffd_msg msg; @@ -514,12 +534,12 @@ void *uffd_poll_thread(void *arg) if (!args->handle_fault) args->handle_fault = uffd_handle_page_fault; - pollfd[0].fd = uffd; + pollfd[0].fd = gopts->uffd; pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].fd = gopts->pipefd[cpu*2]; pollfd[1].events = POLLIN; - ready_for_fork = true; + gopts->ready_for_fork = true; for (;;) { ret = poll(pollfd, 2, -1); @@ -537,30 +557,30 @@ void *uffd_poll_thread(void *arg) } if (!(pollfd[0].revents & POLLIN)) err("pollfd[0].revents %d", pollfd[0].revents); - if (uffd_read_msg(uffd, &msg)) + if (uffd_read_msg(gopts, &msg)) continue; switch (msg.event) { default: err("unexpected msg event %u\n", msg.event); break; case UFFD_EVENT_PAGEFAULT: - args->handle_fault(&msg, args); + args->handle_fault(gopts, &msg, args); break; case UFFD_EVENT_FORK: - close(uffd); - uffd = msg.arg.fork.ufd; - pollfd[0].fd = uffd; + close(gopts->uffd); + gopts->uffd = msg.arg.fork.ufd; + pollfd[0].fd = gopts->uffd; break; case UFFD_EVENT_REMOVE: uffd_reg.range.start = msg.arg.remove.start; uffd_reg.range.len = msg.arg.remove.end - msg.arg.remove.start; - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) + if (ioctl(gopts->uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) err("remove failure"); break; case UFFD_EVENT_REMAP: - area_remap = area_dst; /* save for later unmap */ - area_dst = (char *)(unsigned long)msg.arg.remap.to; + gopts->area_remap = gopts->area_dst; /* save for later unmap */ + gopts->area_dst = (char *)(unsigned long)msg.arg.remap.to; break; } } @@ -568,17 +588,18 @@ void *uffd_poll_thread(void *arg) return NULL; } -static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, +static void retry_copy_page(uffd_global_test_opts_t *gopts, struct uffdio_copy *uffdio_copy, unsigned long offset) { - uffd_test_ops->alias_mapping(&uffdio_copy->dst, + uffd_test_ops->alias_mapping(gopts, + &uffdio_copy->dst, uffdio_copy->len, offset); - if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy->copy != -EEXIST) err("UFFDIO_COPY retry error: %"PRId64, - (int64_t)uffdio_copy->copy); + (int64_t)uffdio_copy->copy); } else { err("UFFDIO_COPY retry unexpected: %"PRId64, (int64_t)uffdio_copy->copy); @@ -597,60 +618,60 @@ static void wake_range(int ufd, unsigned long addr, unsigned long len) addr), exit(1); } -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp) +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp) { struct uffdio_copy uffdio_copy; - if (offset >= nr_pages * page_size) + if (offset >= gopts->nr_pages * gopts->page_size) err("unexpected offset %lu\n", offset); - uffdio_copy.dst = (unsigned long) area_dst + offset; - uffdio_copy.src = (unsigned long) area_src + offset; - uffdio_copy.len = page_size; + uffdio_copy.dst = (unsigned long) gopts->area_dst + offset; + uffdio_copy.src = (unsigned long) gopts->area_src + offset; + uffdio_copy.len = gopts->page_size; if (wp) uffdio_copy.mode = UFFDIO_COPY_MODE_WP; else uffdio_copy.mode = 0; uffdio_copy.copy = 0; - if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy.copy != -EEXIST) err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); - wake_range(ufd, uffdio_copy.dst, page_size); - } else if (uffdio_copy.copy != page_size) { + wake_range(gopts->uffd, uffdio_copy.dst, gopts->page_size); + } else if (uffdio_copy.copy != gopts->page_size) { err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); } else { - if (test_uffdio_copy_eexist && retry) { - test_uffdio_copy_eexist = false; - retry_copy_page(ufd, &uffdio_copy, offset); + if (gopts->test_uffdio_copy_eexist && retry) { + gopts->test_uffdio_copy_eexist = false; + retry_copy_page(gopts, &uffdio_copy, offset); } return 1; } return 0; } -int copy_page(int ufd, unsigned long offset, bool wp) +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp) { - return __copy_page(ufd, offset, false, wp); + return __copy_page(gopts, offset, false, wp); } -int move_page(int ufd, unsigned long offset, unsigned long len) +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len) { struct uffdio_move uffdio_move; - if (offset + len > nr_pages * page_size) + if (offset + len > gopts->nr_pages * gopts->page_size) err("unexpected offset %lu and length %lu\n", offset, len); - uffdio_move.dst = (unsigned long) area_dst + offset; - uffdio_move.src = (unsigned long) area_src + offset; + uffdio_move.dst = (unsigned long) gopts->area_dst + offset; + uffdio_move.src = (unsigned long) gopts->area_src + offset; uffdio_move.len = len; uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; uffdio_move.move = 0; - if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) { + if (ioctl(gopts->uffd, UFFDIO_MOVE, &uffdio_move)) { /* real retval in uffdio_move.move */ if (uffdio_move.move != -EEXIST) err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); - wake_range(ufd, uffdio_move.dst, len); + wake_range(gopts->uffd, uffdio_move.dst, len); } else if (uffdio_move.move != len) { err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); } else diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 7700cbfa3975..37d3ca55905f 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -56,20 +56,17 @@ #define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__) -/* pthread_mutex_t starts at page offset 0 */ -#define area_mutex(___area, ___nr) \ - ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) -/* - * count is placed in the page after pthread_mutex_t naturally aligned - * to avoid non alignment faults on non-x86 archs. - */ -#define area_count(___area, ___nr) \ - ((volatile unsigned long long *) ((unsigned long) \ - ((___area) + (___nr)*page_size + \ - sizeof(pthread_mutex_t) + \ - sizeof(unsigned long long) - 1) & \ - ~(unsigned long)(sizeof(unsigned long long) \ - - 1))) +struct uffd_global_test_opts { + unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; + char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; + int uffd, uffd_flags, finished, *pipefd, test_type; + bool map_shared; + bool test_uffdio_wp; + unsigned long long *count_verify; + volatile bool test_uffdio_copy_eexist; + atomic_bool ready_for_fork; +}; +typedef struct uffd_global_test_opts uffd_global_test_opts_t; /* Userfaultfd test statistics */ struct uffd_args { @@ -79,50 +76,55 @@ struct uffd_args { unsigned long missing_faults; unsigned long wp_faults; unsigned long minor_faults; + struct uffd_global_test_opts *gopts; /* A custom fault handler; defaults to uffd_handle_page_fault. */ - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args); + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, + struct uffd_args *args); }; struct uffd_test_ops { - int (*allocate_area)(void **alloc_area, bool is_src); - void (*release_pages)(char *rel_area); - void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); - void (*check_pmd_mapping)(void *p, int expect_nr_hpages); + int (*allocate_area)(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src); + void (*release_pages)(uffd_global_test_opts_t *gopts, char *rel_area); + void (*alias_mapping)(uffd_global_test_opts_t *gopts, + __u64 *start, + size_t len, + unsigned long offset); + void (*check_pmd_mapping)(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages); }; typedef struct uffd_test_ops uffd_test_ops_t; struct uffd_test_case_ops { - int (*pre_alloc)(const char **errmsg); - int (*post_alloc)(const char **errmsg); + int (*pre_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); + int (*post_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -extern int uffd, uffd_flags, finished, *pipefd, test_type; -extern bool map_shared; -extern bool test_uffdio_wp; -extern unsigned long long *count_verify; -extern volatile bool test_uffdio_copy_eexist; -extern atomic_bool ready_for_fork; - +extern uffd_global_test_opts_t *uffd_gtest_opts; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; extern uffd_test_ops_t hugetlb_uffd_test_ops; extern uffd_test_ops_t *uffd_test_ops; extern uffd_test_case_ops_t *uffd_test_case_ops; +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts); +volatile unsigned long long *area_count(char *area, + unsigned long nr, + uffd_global_test_opts_t *gopts); + void uffd_stats_report(struct uffd_args *args, int n_cpus); -int uffd_test_ctx_init(uint64_t features, const char **errmsg); -void uffd_test_ctx_clear(void); -int userfaultfd_open(uint64_t *features); -int uffd_read_msg(int ufd, struct uffd_msg *msg); +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg); +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts); +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features); +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg); void wp_range(int ufd, __u64 start, __u64 len, bool wp); -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args); -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp); -int copy_page(int ufd, unsigned long offset, bool wp); -int move_page(int ufd, unsigned long offset, unsigned long len); +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args); +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp); +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp); +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len); void *uffd_poll_thread(void *arg); int uffd_open_dev(unsigned int flags); diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 40af7f67c407..b51c89e1cd1a 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -44,6 +44,12 @@ uint64_t features; #define BOUNCE_VERIFY (1<<2) #define BOUNCE_POLL (1<<3) static int bounces; +/* defined globally for this particular test as the sigalrm handler + * depends on test_uffdio_*_eexist. + * XXX: define gopts in main() when we figure out a way to deal with + * test_uffdio_*_eexist. + */ +static uffd_global_test_opts_t *gopts; /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ #define ALARM_INTERVAL_SECS 10 @@ -51,7 +57,7 @@ static char *zeropage; pthread_attr_t attr; #define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + do { __auto_type __tmp = (a); (a) = (b); (b) = __tmp; } while (0) const char *examples = "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" @@ -76,54 +82,58 @@ static void usage(void) exit(1); } -static void uffd_stats_reset(struct uffd_args *args, unsigned long n_cpus) +static void uffd_stats_reset(uffd_global_test_opts_t *gopts, struct uffd_args *args, + unsigned long n_cpus) { int i; for (i = 0; i < n_cpus; i++) { args[i].cpu = i; - args[i].apply_wp = test_uffdio_wp; + args[i].apply_wp = gopts->test_uffdio_wp; args[i].missing_faults = 0; args[i].wp_faults = 0; args[i].minor_faults = 0; + args[i].gopts = gopts; } } static void *locking_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr; unsigned long long count; if (!(bounces & BOUNCE_RANDOM)) { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) - page_nr += cpu * nr_pages_per_cpu; + page_nr += cpu * gopts->nr_pages_per_cpu; } - while (!finished) { + while (!gopts->finished) { if (bounces & BOUNCE_RANDOM) { if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr)) err("getrandom failed"); } else page_nr += 1; - page_nr %= nr_pages; - pthread_mutex_lock(area_mutex(area_dst, page_nr)); - count = *area_count(area_dst, page_nr); - if (count != count_verify[page_nr]) + page_nr %= gopts->nr_pages; + pthread_mutex_lock(area_mutex(gopts->area_dst, page_nr, gopts)); + count = *area_count(gopts->area_dst, page_nr, gopts); + if (count != gopts->count_verify[page_nr]) err("page_nr %lu memory corruption %llu %llu", - page_nr, count, count_verify[page_nr]); + page_nr, count, gopts->count_verify[page_nr]); count++; - *area_count(area_dst, page_nr) = count_verify[page_nr] = count; - pthread_mutex_unlock(area_mutex(area_dst, page_nr)); + *area_count(gopts->area_dst, page_nr, gopts) = gopts->count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(gopts->area_dst, page_nr, gopts)); } return NULL; } -static int copy_page_retry(int ufd, unsigned long offset) +static int copy_page_retry(uffd_global_test_opts_t *gopts, unsigned long offset) { - return __copy_page(ufd, offset, true, test_uffdio_wp); + return __copy_page(gopts, offset, true, gopts->test_uffdio_wp); } pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -131,15 +141,16 @@ pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; static void *uffd_read_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; struct uffd_msg msg; pthread_mutex_unlock(&uffd_read_mutex); /* from here cancellation is ok */ for (;;) { - if (uffd_read_msg(uffd, &msg)) + if (uffd_read_msg(gopts, &msg)) continue; - uffd_handle_page_fault(&msg, args); + uffd_handle_page_fault(gopts, &msg, args); } return NULL; @@ -147,32 +158,34 @@ static void *uffd_read_thread(void *arg) static void *background_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr, start_nr, mid_nr, end_nr; - start_nr = cpu * nr_pages_per_cpu; - end_nr = (cpu+1) * nr_pages_per_cpu; + start_nr = cpu * gopts->nr_pages_per_cpu; + end_nr = (cpu+1) * gopts->nr_pages_per_cpu; mid_nr = (start_nr + end_nr) / 2; /* Copy the first half of the pages */ for (page_nr = start_nr; page_nr < mid_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); /* * If we need to test uffd-wp, set it up now. Then we'll have * at least the first half of the pages mapped already which * can be write-protected for testing */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst + start_nr * page_size, - nr_pages_per_cpu * page_size, true); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst + start_nr * gopts->page_size, + gopts->nr_pages_per_cpu * gopts->page_size, true); /* * Continue the 2nd half of the page copying, handling write * protection faults if any */ for (page_nr = mid_nr; page_nr < end_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); return NULL; } @@ -180,17 +193,21 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_parallel]; - pthread_t uffd_threads[nr_parallel]; - pthread_t background_threads[nr_parallel]; + uffd_global_test_opts_t *gopts = args->gopts; + pthread_t locking_threads[gopts->nr_parallel]; + pthread_t uffd_threads[gopts->nr_parallel]; + pthread_t background_threads[gopts->nr_parallel]; - finished = 0; - for (cpu = 0; cpu < nr_parallel; cpu++) { + gopts->finished = 0; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, - locking_thread, (void *)cpu)) + locking_thread, (void *)&args[cpu])) return 1; if (bounces & BOUNCE_POLL) { - if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu])) + if (pthread_create(&uffd_threads[cpu], + &attr, + uffd_poll_thread, + (void *) &args[cpu])) err("uffd_poll_thread create"); } else { if (pthread_create(&uffd_threads[cpu], &attr, @@ -200,10 +217,10 @@ static int stress(struct uffd_args *args) pthread_mutex_lock(&uffd_read_mutex); } if (pthread_create(&background_threads[cpu], &attr, - background_thread, (void *)cpu)) + background_thread, (void *)&args[cpu])) return 1; } - for (cpu = 0; cpu < nr_parallel; cpu++) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -216,17 +233,17 @@ static int stress(struct uffd_args *args) * UFFDIO_COPY without writing zero pages into area_dst * because the background threads already completed). */ - uffd_test_ops->release_pages(area_src); + uffd_test_ops->release_pages(gopts, gopts->area_src); - finished = 1; - for (cpu = 0; cpu < nr_parallel; cpu++) + gopts->finished = 1; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_parallel; cpu++) { + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { - if (write(pipefd[cpu*2+1], &c, 1) != 1) + if (write(gopts->pipefd[cpu*2+1], &c, 1) != 1) err("pipefd write error"); if (pthread_join(uffd_threads[cpu], (void *)&args[cpu])) @@ -242,26 +259,26 @@ static int stress(struct uffd_args *args) return 0; } -static int userfaultfd_stress(void) +static int userfaultfd_stress(uffd_global_test_opts_t *gopts) { void *area; unsigned long nr; - struct uffd_args args[nr_parallel]; - uint64_t mem_size = nr_pages * page_size; + struct uffd_args args[gopts->nr_parallel]; + uint64_t mem_size = gopts->nr_pages * gopts->page_size; int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_parallel); + memset(args, 0, sizeof(struct uffd_args) * gopts->nr_parallel); - if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) + if (features & UFFD_FEATURE_WP_UNPOPULATED && gopts->test_type == TEST_ANON) flags = UFFD_FEATURE_WP_UNPOPULATED; - if (uffd_test_ctx_init(flags, NULL)) + if (uffd_test_ctx_init(gopts, flags, NULL)) err("context init failed"); - if (posix_memalign(&area, page_size, page_size)) + if (posix_memalign(&area, gopts->page_size, gopts->page_size)) err("out of memory"); zeropage = area; - bzero(zeropage, page_size); + bzero(zeropage, gopts->page_size); pthread_mutex_lock(&uffd_read_mutex); @@ -284,18 +301,18 @@ static int userfaultfd_stress(void) fflush(stdout); if (bounces & BOUNCE_POLL) - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); else - fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags & ~O_NONBLOCK); /* register */ - if (uffd_register(uffd, area_dst, mem_size, - true, test_uffdio_wp, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure"); - if (area_dst_alias) { - if (uffd_register(uffd, area_dst_alias, mem_size, - true, test_uffdio_wp, false)) + if (gopts->area_dst_alias) { + if (uffd_register(gopts->uffd, gopts->area_dst_alias, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure alias"); } @@ -323,87 +340,88 @@ static int userfaultfd_stress(void) * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's * required to MADV_DONTNEED here. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - uffd_stats_reset(args, nr_parallel); + uffd_stats_reset(gopts, args, gopts->nr_parallel); /* bounce pass */ if (stress(args)) { - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 1; } /* Clear all the write protections if there is any */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst, - nr_pages * page_size, false); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst, + gopts->nr_pages * gopts->page_size, false); /* unregister */ - if (uffd_unregister(uffd, area_dst, mem_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, mem_size)) err("unregister failure"); - if (area_dst_alias) { - if (uffd_unregister(uffd, area_dst_alias, mem_size)) + if (gopts->area_dst_alias) { + if (uffd_unregister(gopts->uffd, gopts->area_dst_alias, mem_size)) err("unregister failure alias"); } /* verification */ if (bounces & BOUNCE_VERIFY) - for (nr = 0; nr < nr_pages; nr++) - if (*area_count(area_dst, nr) != count_verify[nr]) + for (nr = 0; nr < gopts->nr_pages; nr++) + if (*area_count(gopts->area_dst, nr, gopts) != + gopts->count_verify[nr]) err("error area_count %llu %llu %lu\n", - *area_count(area_src, nr), - count_verify[nr], nr); + *area_count(gopts->area_src, nr, gopts), + gopts->count_verify[nr], nr); /* prepare next bounce */ - swap(area_src, area_dst); + swap(gopts->area_src, gopts->area_dst); - swap(area_src_alias, area_dst_alias); + swap(gopts->area_src_alias, gopts->area_dst_alias); - uffd_stats_report(args, nr_parallel); + uffd_stats_report(args, gopts->nr_parallel); } - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 0; } -static void set_test_type(const char *type) +static void set_test_type(uffd_global_test_opts_t *gopts, const char *type) { if (!strcmp(type, "anon")) { - test_type = TEST_ANON; + gopts->test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; } else if (!strcmp(type, "hugetlb")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; - map_shared = true; + gopts->map_shared = true; } else if (!strcmp(type, "hugetlb-private")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; } else if (!strcmp(type, "shmem")) { - map_shared = true; - test_type = TEST_SHMEM; + gopts->map_shared = true; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } else if (!strcmp(type, "shmem-private")) { - test_type = TEST_SHMEM; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } } -static void parse_test_type_arg(const char *raw_type) +static void parse_test_type_arg(uffd_global_test_opts_t *gopts, const char *raw_type) { - set_test_type(raw_type); + set_test_type(gopts, raw_type); - if (!test_type) + if (!gopts->test_type) err("failed to parse test type argument: '%s'", raw_type); - if (test_type == TEST_HUGETLB) - page_size = default_huge_page_size(); + if (gopts->test_type == TEST_HUGETLB) + gopts->page_size = default_huge_page_size(); else - page_size = sysconf(_SC_PAGE_SIZE); + gopts->page_size = sysconf(_SC_PAGE_SIZE); - if (!page_size) + if (!gopts->page_size) err("Unable to determine page size"); - if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) + if ((unsigned long) area_count(NULL, 0, gopts) + sizeof(unsigned long long) * 2 + > gopts->page_size) err("Impossible to run this test"); /* @@ -415,21 +433,21 @@ static void parse_test_type_arg(const char *raw_type) if (uffd_get_features(&features) && errno == ENOENT) ksft_exit_skip("failed to get available features (%d)\n", errno); - test_uffdio_wp = test_uffdio_wp && + gopts->test_uffdio_wp = gopts->test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); - if (test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) - test_uffdio_wp = false; + if (gopts->test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + gopts->test_uffdio_wp = false; - close(uffd); - uffd = -1; + close(gopts->uffd); + gopts->uffd = -1; } static void sigalrm(int sig) { if (sig != SIGALRM) abort(); - test_uffdio_copy_eexist = true; + gopts->test_uffdio_copy_eexist = true; alarm(ALARM_INTERVAL_SECS); } @@ -438,6 +456,8 @@ int main(int argc, char **argv) unsigned long nr_cpus; size_t bytes; + gopts = (uffd_global_test_opts_t *) malloc(sizeof(uffd_global_test_opts_t)); + if (argc < 4) usage(); @@ -445,29 +465,34 @@ int main(int argc, char **argv) err("failed to arm SIGALRM"); alarm(ALARM_INTERVAL_SECS); - parse_test_type_arg(argv[1]); + parse_test_type_arg(gopts, argv[1]); bytes = atol(argv[2]) * 1024 * 1024; - if (test_type == TEST_HUGETLB && - get_free_hugepages() < bytes / page_size) { - printf("skip: Skipping userfaultfd... not enough hugepages\n"); - return KSFT_SKIP; - } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (nr_cpus > 32) { /* Don't let calculation below go to zero. */ ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", nr_cpus); - nr_parallel = 32; + gopts->nr_parallel = 32; } else { - nr_parallel = nr_cpus; + gopts->nr_parallel = nr_cpus; + } + + /* + * src and dst each require bytes / page_size number of hugepages. + * Ensure nr_parallel - 1 hugepages on top of that to account + * for racy extra reservation of hugepages. + */ + if (gopts->test_type == TEST_HUGETLB && + get_free_hugepages() < 2 * (bytes / gopts->page_size) + gopts->nr_parallel - 1) { + printf("skip: Skipping userfaultfd... not enough hugepages\n"); + return KSFT_SKIP; } - nr_pages_per_cpu = bytes / page_size / nr_parallel; - if (!nr_pages_per_cpu) { + gopts->nr_pages_per_cpu = bytes / gopts->page_size / gopts->nr_parallel; + if (!gopts->nr_pages_per_cpu) { _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", - bytes, page_size, nr_parallel); + bytes, gopts->page_size, gopts->nr_parallel); usage(); } @@ -476,11 +501,11 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_parallel; + gopts->nr_pages = gopts->nr_pages_per_cpu * gopts->nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", - nr_pages, nr_pages_per_cpu); - return userfaultfd_stress(); + gopts->nr_pages, gopts->nr_pages_per_cpu); + return userfaultfd_stress(gopts); } #else /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 50501b38e34e..9e3be2ee7f1b 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -76,7 +76,7 @@ struct uffd_test_args { typedef struct uffd_test_args uffd_test_args_t; /* Returns: UFFD_TEST_* */ -typedef void (*uffd_test_fn)(uffd_test_args_t *); +typedef void (*uffd_test_fn)(uffd_global_test_opts_t *, uffd_test_args_t *); typedef struct { const char *name; @@ -181,33 +181,6 @@ out: return 1; } -/* - * This function initializes the global variables. TODO: remove global - * vars and then remove this. - */ -static int -uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, - mem_type_t *mem_type, const char **errmsg) -{ - map_shared = mem_type->shared; - uffd_test_ops = mem_type->mem_ops; - uffd_test_case_ops = test->test_case_ops; - - if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) - page_size = default_huge_page_size(); - else - page_size = psize(); - - /* Ensure we have at least 2 pages */ - nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; - /* TODO: remove this global var.. it's so ugly */ - nr_parallel = 1; - - /* Initialize test arguments */ - args->mem_type = mem_type; - - return uffd_test_ctx_init(test->uffd_feature_required, errmsg); -} static bool uffd_feature_supported(uffd_test_case_t *test) { @@ -237,7 +210,8 @@ static int pagemap_open(void) } while (0) typedef struct { - int parent_uffd, child_uffd; + uffd_global_test_opts_t *gopts; + int child_uffd; } fork_event_args; static void *fork_event_consumer(void *data) @@ -245,10 +219,10 @@ static void *fork_event_consumer(void *data) fork_event_args *args = data; struct uffd_msg msg = { 0 }; - ready_for_fork = true; + args->gopts->ready_for_fork = true; /* Read until a full msg received */ - while (uffd_read_msg(args->parent_uffd, &msg)); + while (uffd_read_msg(args->gopts, &msg)); if (msg.event != UFFD_EVENT_FORK) err("wrong message: %u\n", msg.event); @@ -304,9 +278,9 @@ static void unpin_pages(pin_args *args) args->pinned = false; } -static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) +static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bool test_pin) { - fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 }; + fork_event_args args = { .gopts = gopts, .child_uffd = -1 }; pthread_t thread; pid_t child; uint64_t value; @@ -314,10 +288,10 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { - ready_for_fork = false; + gopts->ready_for_fork = false; if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ } @@ -328,14 +302,14 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) fd = pagemap_open(); - if (test_pin && pin_pages(&args, area_dst, page_size)) + if (test_pin && pin_pages(&args, gopts->area_dst, gopts->page_size)) /* * Normally when reach here we have pinned in * previous tests, so shouldn't fail anymore */ err("pin page failed in child"); - value = pagemap_get_entry(fd, area_dst); + value = pagemap_get_entry(fd, gopts->area_dst); /* * After fork(), we should handle uffd-wp bit differently: * @@ -361,70 +335,70 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) return result; } -static void uffd_wp_unpopulated_test(uffd_test_args_t *args) +static void uffd_wp_unpopulated_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { uint64_t value; int pagemap_fd; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Test applying pte marker to anon unpopulated */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); /* Test unprotect on anon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test zap on anon marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - if (madvise(area_dst, page_size, MADV_DONTNEED)) + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test fault in after marker removed */ - *area_dst = 1; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); /* Test read-zero-page upon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - *(volatile char *)area_dst; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + *(volatile char *)gopts->area_dst; /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); uffd_test_pass(); } -static void uffd_wp_fork_test_common(uffd_test_args_t *args, +static void uffd_wp_fork_test_common(uffd_global_test_opts_t *gopts, uffd_test_args_t *args, bool with_event) { int pagemap_fd; uint64_t value; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in present pte", with_event ? "missing" : "stall"); goto out; @@ -442,79 +416,80 @@ static void uffd_wp_fork_test_common(uffd_test_args_t *args, * to expose pte markers. */ if (args->mem_type->shared) { - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("MADV_DONTNEED"); } else { /* * NOTE: ignore retval because private-hugetlb doesn't yet * support swapping, so it could fail. */ - madvise(area_dst, page_size, MADV_PAGEOUT); + madvise(gopts->area_dst, gopts->page_size, MADV_PAGEOUT); } /* Uffd-wp should persist even swapped out */ - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte", with_event ? "missing" : "stall"); goto out; } /* Unprotect; this tests swap pte modifications */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Fault in the page from disk */ - *area_dst = 2; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 2; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, nr_pages * page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size)) err("unregister failed"); close(pagemap_fd); } -static void uffd_wp_fork_test(uffd_test_args_t *args) +static void uffd_wp_fork_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, false); + uffd_wp_fork_test_common(gopts, args, false); } -static void uffd_wp_fork_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, true); + uffd_wp_fork_test_common(gopts, args, true); } -static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, +static void uffd_wp_fork_pin_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t *args, bool with_event) { int pagemap_fd; pin_args pin_args = {}; - if (uffd_register(uffd, area_dst, page_size, false, true, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); /* * 1. First pin, then fork(). This tests fork() special path when * doing early CoW if the page is private. */ - if (pin_pages(&pin_args, area_dst, page_size)) { + if (pin_pages(&pin_args, gopts->area_dst, gopts->page_size)) { uffd_test_skip("Possibly CONFIG_GUP_TEST missing " "or unprivileged"); close(pagemap_fd); - uffd_unregister(uffd, area_dst, page_size); + uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size); return; } - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()", with_event ? "missing" : "stall"); unpin_pages(&pin_args); @@ -527,49 +502,50 @@ static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, * 2. First fork(), then pin (in the child, where test_pin==true). * This tests COR, aka, page unsharing on private memories. */ - if (pagemap_test_fork(uffd, with_event, true)) { + if (pagemap_test_fork(gopts, with_event, true)) { uffd_test_fail("Detected %s uffd-wp bit when RO pin", with_event ? "missing" : "stall"); goto out; } uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("register failed"); close(pagemap_fd); } -static void uffd_wp_fork_pin_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, false); + uffd_wp_fork_pin_test_common(gopts, args, false); } -static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, true); + uffd_wp_fork_pin_test_common(gopts, args, true); } -static void check_memory_contents(char *p) +static void check_memory_contents(uffd_global_test_opts_t *gopts, char *p) { unsigned long i, j; uint8_t expected_byte; - for (i = 0; i < nr_pages; ++i) { + for (i = 0; i < gopts->nr_pages; ++i) { expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); - for (j = 0; j < page_size; j++) { - uint8_t v = *(uint8_t *)(p + (i * page_size) + j); + for (j = 0; j < gopts->page_size; j++) { + uint8_t v = *(uint8_t *)(p + (i * gopts->page_size) + j); if (v != expected_byte) err("unexpected page contents"); } } } -static void uffd_minor_test_common(bool test_collapse, bool test_wp) +static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_collapse, bool test_wp) { unsigned long p; pthread_t uffd_mon; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; /* * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing @@ -577,7 +553,7 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) */ assert(!(test_collapse && test_wp)); - if (uffd_register(uffd, area_dst_alias, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, /* NOTE! MADV_COLLAPSE may not work with uffd-wp */ false, test_wp, true)) err("register failure"); @@ -586,9 +562,9 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * After registering with UFFD, populate the non-UFFD-registered side of * the shared mapping. This should *not* trigger any UFFD minor faults. */ - for (p = 0; p < nr_pages; ++p) - memset(area_dst + (p * page_size), p % ((uint8_t)-1), - page_size); + for (p = 0; p < gopts->nr_pages; ++p) + memset(gopts->area_dst + (p * gopts->page_size), p % ((uint8_t)-1), + gopts->page_size); args.apply_wp = test_wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -600,50 +576,51 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * fault. uffd_poll_thread will resolve the fault by bit-flipping the * page's contents, and then issuing a CONTINUE ioctl. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); if (test_collapse) { - if (madvise(area_dst_alias, nr_pages * page_size, + if (madvise(gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, MADV_COLLAPSE)) { /* It's fine to fail for this one... */ uffd_test_skip("MADV_COLLAPSE failed"); return; } - uffd_test_ops->check_pmd_mapping(area_dst, - nr_pages * page_size / + uffd_test_ops->check_pmd_mapping(gopts, + gopts->area_dst, + gopts->nr_pages * gopts->page_size / read_pmd_pagesize()); /* * This won't cause uffd-fault - it purely just makes sure there * was no corruption. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); } - if (args.missing_faults != 0 || args.minor_faults != nr_pages) + if (args.missing_faults != 0 || args.minor_faults != gopts->nr_pages) uffd_test_fail("stats check error"); else uffd_test_pass(); } -void uffd_minor_test(uffd_test_args_t *args) +void uffd_minor_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(false, false); + uffd_minor_test_common(gopts, false, false); } -void uffd_minor_wp_test(uffd_test_args_t *args) +void uffd_minor_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(false, true); + uffd_minor_test_common(gopts, false, true); } -void uffd_minor_collapse_test(uffd_test_args_t *args) +void uffd_minor_collapse_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(true, false); + uffd_minor_test_common(gopts, true, false); } static sigjmp_buf jbuf, *sigbuf; @@ -678,7 +655,7 @@ static void sighndl(int sig, siginfo_t *siginfo, void *ptr) * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal * feature. Using monitor thread, verify no userfault events are generated. */ -static int faulting_process(int signal_test, bool wp) +static int faulting_process(uffd_global_test_opts_t *gopts, int signal_test, bool wp) { unsigned long nr, i; unsigned long long count; @@ -687,7 +664,7 @@ static int faulting_process(int signal_test, bool wp) struct sigaction act; volatile unsigned long signalled = 0; - split_nr_pages = (nr_pages + 1) / 2; + split_nr_pages = (gopts->nr_pages + 1) / 2; if (signal_test) { sigbuf = &jbuf; @@ -701,7 +678,7 @@ static int faulting_process(int signal_test, bool wp) for (nr = 0; nr < split_nr_pages; nr++) { volatile int steps = 1; - unsigned long offset = nr * page_size; + unsigned long offset = nr * gopts->page_size; if (signal_test) { if (sigsetjmp(*sigbuf, 1) != 0) { @@ -713,15 +690,15 @@ static int faulting_process(int signal_test, bool wp) if (steps == 1) { /* This is a MISSING request */ steps++; - if (copy_page(uffd, offset, wp)) + if (copy_page(gopts, offset, wp)) signalled++; } else { /* This is a WP request */ assert(steps == 2); - wp_range(uffd, - (__u64)area_dst + + wp_range(gopts->uffd, + (__u64)gopts->area_dst + offset, - page_size, false); + gopts->page_size, false); } } else { signalled++; @@ -730,51 +707,53 @@ static int faulting_process(int signal_test, bool wp) } } - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } if (signal_test) return signalled != split_nr_pages; - area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, - MREMAP_MAYMOVE | MREMAP_FIXED, area_src); - if (area_dst == MAP_FAILED) + gopts->area_dst = mremap(gopts->area_dst, gopts->nr_pages * gopts->page_size, + gopts->nr_pages * gopts->page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, + gopts->area_src); + if (gopts->area_dst == MAP_FAILED) err("mremap"); /* Reset area_src since we just clobbered it */ - area_src = NULL; + gopts->area_src = NULL; - for (; nr < nr_pages; nr++) { - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) { + for (; nr < gopts->nr_pages; nr++) { + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) { err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); } /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - for (nr = 0; nr < nr_pages; nr++) - for (i = 0; i < page_size; i++) - if (*(area_dst + nr * page_size + i) != 0) + for (nr = 0; nr < gopts->nr_pages; nr++) + for (i = 0; i < gopts->page_size; i++) + if (*(gopts->area_dst + nr * gopts->page_size + i) != 0) err("page %lu offset %lu is not zero", nr, i); return 0; } -static void uffd_sigbus_test_common(bool wp) +static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp) { unsigned long userfaults; pthread_t uffd_mon; @@ -782,25 +761,26 @@ static void uffd_sigbus_test_common(bool wp) int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); - if (faulting_process(1, wp)) + if (faulting_process(gopts, 1, wp)) err("faulting process failed"); - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); args.apply_wp = wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -808,12 +788,12 @@ static void uffd_sigbus_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(2, wp)); + exit(faulting_process(gopts, 2, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, (void **)&userfaults)) err("pthread_join()"); @@ -824,28 +804,29 @@ static void uffd_sigbus_test_common(bool wp) uffd_test_pass(); } -static void uffd_sigbus_test(uffd_test_args_t *args) +static void uffd_sigbus_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_sigbus_test_common(false); + uffd_sigbus_test_common(gopts, false); } -static void uffd_sigbus_wp_test(uffd_test_args_t *args) +static void uffd_sigbus_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_sigbus_test_common(true); + uffd_sigbus_test_common(gopts, true); } -static void uffd_events_test_common(bool wp) +static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp) { pthread_t uffd_mon; pid_t pid; int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); @@ -853,7 +834,7 @@ static void uffd_events_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -861,39 +842,39 @@ static void uffd_events_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(0, wp)); + exit(faulting_process(gopts, 0, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); - if (args.missing_faults != nr_pages) + if (args.missing_faults != gopts->nr_pages) uffd_test_fail("Fault counts wrong"); else uffd_test_pass(); } -static void uffd_events_test(uffd_test_args_t *args) +static void uffd_events_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_events_test_common(false); + uffd_events_test_common(gopts, false); } -static void uffd_events_wp_test(uffd_test_args_t *args) +static void uffd_events_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_events_test_common(true); + uffd_events_test_common(gopts, true); } -static void retry_uffdio_zeropage(int ufd, +static void retry_uffdio_zeropage(uffd_global_test_opts_t *gopts, struct uffdio_zeropage *uffdio_zeropage) { - uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, + uffd_test_ops->alias_mapping(gopts, &uffdio_zeropage->range.start, uffdio_zeropage->range.len, 0); - if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { + if (ioctl(gopts->uffd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { if (uffdio_zeropage->zeropage != -EEXIST) err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)uffdio_zeropage->zeropage); @@ -903,16 +884,16 @@ static void retry_uffdio_zeropage(int ufd, } } -static bool do_uffdio_zeropage(int ufd, bool has_zeropage) +static bool do_uffdio_zeropage(uffd_global_test_opts_t *gopts, bool has_zeropage) { struct uffdio_zeropage uffdio_zeropage = { 0 }; int ret; __s64 res; - uffdio_zeropage.range.start = (unsigned long) area_dst; - uffdio_zeropage.range.len = page_size; + uffdio_zeropage.range.start = (unsigned long) gopts->area_dst; + uffdio_zeropage.range.len = gopts->page_size; uffdio_zeropage.mode = 0; - ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); + ret = ioctl(gopts->uffd, UFFDIO_ZEROPAGE, &uffdio_zeropage); res = uffdio_zeropage.zeropage; if (ret) { /* real retval in ufdio_zeropage.zeropage */ @@ -921,10 +902,10 @@ static bool do_uffdio_zeropage(int ufd, bool has_zeropage) else if (res != -EINVAL) err("UFFDIO_ZEROPAGE not -EINVAL"); } else if (has_zeropage) { - if (res != page_size) + if (res != gopts->page_size) err("UFFDIO_ZEROPAGE unexpected size"); else - retry_uffdio_zeropage(ufd, &uffdio_zeropage); + retry_uffdio_zeropage(gopts, &uffdio_zeropage); return true; } else err("UFFDIO_ZEROPAGE succeeded"); @@ -950,25 +931,29 @@ uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len) } /* exercise UFFDIO_ZEROPAGE */ -static void uffd_zeropage_test(uffd_test_args_t *args) +static void uffd_zeropage_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { bool has_zeropage; int i; - has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size); - if (area_dst_alias) + has_zeropage = uffd_register_detect_zeropage(gopts->uffd, + gopts->area_dst, + gopts->page_size); + if (gopts->area_dst_alias) /* Ignore the retval; we already have it */ - uffd_register_detect_zeropage(uffd, area_dst_alias, page_size); + uffd_register_detect_zeropage(gopts->uffd, gopts->area_dst_alias, gopts->page_size); - if (do_uffdio_zeropage(uffd, has_zeropage)) - for (i = 0; i < page_size; i++) - if (area_dst[i] != 0) + if (do_uffdio_zeropage(gopts, has_zeropage)) + for (i = 0; i < gopts->page_size; i++) + if (gopts->area_dst[i] != 0) err("data non-zero at offset %d\n", i); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); - if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size)) + if (gopts->area_dst_alias && uffd_unregister(gopts->uffd, + gopts->area_dst_alias, + gopts->page_size)) err("unregister"); uffd_test_pass(); @@ -987,26 +972,27 @@ static void uffd_register_poison(int uffd, void *addr, uint64_t len) err("registered area doesn't support COPY and POISON ioctls"); } -static void do_uffdio_poison(int uffd, unsigned long offset) +static void do_uffdio_poison(uffd_global_test_opts_t *gopts, unsigned long offset) { struct uffdio_poison uffdio_poison = { 0 }; int ret; __s64 res; - uffdio_poison.range.start = (unsigned long) area_dst + offset; - uffdio_poison.range.len = page_size; + uffdio_poison.range.start = (unsigned long) gopts->area_dst + offset; + uffdio_poison.range.len = gopts->page_size; uffdio_poison.mode = 0; - ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); + ret = ioctl(gopts->uffd, UFFDIO_POISON, &uffdio_poison); res = uffdio_poison.updated; if (ret) err("UFFDIO_POISON error: %"PRId64, (int64_t)res); - else if (res != page_size) + else if (res != gopts->page_size) err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); } -static void uffd_poison_handle_fault( - struct uffd_msg *msg, struct uffd_args *args) +static void uffd_poison_handle_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args) { unsigned long offset; @@ -1017,20 +1003,20 @@ static void uffd_poison_handle_fault( (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); /* Odd pages -> copy zeroed page; even pages -> poison. */ - if (offset & page_size) - copy_page(uffd, offset, false); + if (offset & gopts->page_size) + copy_page(gopts, offset, false); else - do_uffdio_poison(uffd, offset); + do_uffdio_poison(gopts, offset); } /* Make sure to cover odd/even, and minimum duplications */ #define UFFD_POISON_TEST_NPAGES 4 -static void uffd_poison_test(uffd_test_args_t *targs) +static void uffd_poison_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { pthread_t uffd_mon; char c; @@ -1039,15 +1025,17 @@ static void uffd_poison_test(uffd_test_args_t *targs) unsigned long nr_sigbus = 0; unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES; - if (nr_pages < poison_pages) { - uffd_test_skip("Too few pages for POISON test"); + if (gopts->nr_pages < poison_pages) { + uffd_test_skip("Too less pages for POISON test"); return; } - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + args.gopts = gopts; + + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - uffd_register_poison(uffd, area_dst, poison_pages * page_size); - memset(area_src, 0, poison_pages * page_size); + uffd_register_poison(gopts->uffd, gopts->area_dst, poison_pages * gopts->page_size); + memset(gopts->area_src, 0, poison_pages * gopts->page_size); args.handle_fault = uffd_poison_handle_fault; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -1060,8 +1048,8 @@ static void uffd_poison_test(uffd_test_args_t *targs) err("sigaction"); for (nr = 0; nr < poison_pages; ++nr) { - unsigned long offset = nr * page_size; - const char *bytes = (const char *) area_dst + offset; + unsigned long offset = nr * gopts->page_size; + const char *bytes = (const char *) gopts->area_dst + offset; const char *i; if (sigsetjmp(*sigbuf, 1)) { @@ -1074,14 +1062,14 @@ static void uffd_poison_test(uffd_test_args_t *targs) continue; } - for (i = bytes; i < bytes + page_size; ++i) { + for (i = bytes; i < bytes + gopts->page_size; ++i) { if (*i) err("nonzero byte in area_dst (%p) at %p: %u", - area_dst, i, *i); + gopts->area_dst, i, *i); } } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); @@ -1094,7 +1082,9 @@ static void uffd_poison_test(uffd_test_args_t *targs) } static void -uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, +uffd_move_handle_fault_common(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args, unsigned long len) { unsigned long offset; @@ -1106,28 +1096,32 @@ uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; offset &= ~(len-1); - if (move_page(uffd, offset, len)) + if (move_page(gopts, offset, len)) args->missing_faults++; } -static void uffd_move_handle_fault(struct uffd_msg *msg, +static void uffd_move_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, page_size); + uffd_move_handle_fault_common(gopts, msg, args, gopts->page_size); } -static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, +static void uffd_move_pmd_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); + uffd_move_handle_fault_common(gopts, msg, args, read_pmd_pagesize()); } static void -uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args)) +uffd_move_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t *targs, + unsigned long chunk_size, + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, struct uffd_args *args) +) { unsigned long nr; pthread_t uffd_mon; @@ -1139,11 +1133,13 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, unsigned long src_offs = 0; unsigned long dst_offs = 0; + args.gopts = gopts; + /* Prevent source pages from being mapped more than once */ - if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_DONTFORK)) err("madvise(MADV_DONTFORK) failure"); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("register failure"); @@ -1151,22 +1147,22 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - step_size = chunk_size / page_size; - step_count = nr_pages / step_size; + step_size = chunk_size / gopts->page_size; + step_count = gopts->nr_pages / step_size; - if (chunk_size > page_size) { - char *aligned_src = ALIGN_UP(area_src, chunk_size); - char *aligned_dst = ALIGN_UP(area_dst, chunk_size); + if (chunk_size > gopts->page_size) { + char *aligned_src = ALIGN_UP(gopts->area_src, chunk_size); + char *aligned_dst = ALIGN_UP(gopts->area_dst, chunk_size); - if (aligned_src != area_src || aligned_dst != area_dst) { - src_offs = (aligned_src - area_src) / page_size; - dst_offs = (aligned_dst - area_dst) / page_size; + if (aligned_src != gopts->area_src || aligned_dst != gopts->area_dst) { + src_offs = (aligned_src - gopts->area_src) / gopts->page_size; + dst_offs = (aligned_dst - gopts->area_dst) / gopts->page_size; step_count--; } - orig_area_src = area_src; - orig_area_dst = area_dst; - area_src = aligned_src; - area_dst = aligned_dst; + orig_area_src = gopts->area_src; + orig_area_dst = gopts->area_dst; + gopts->area_src = aligned_src; + gopts->area_dst = aligned_dst; } /* @@ -1180,34 +1176,34 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, /* Check area_src content */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); - if (count != count_verify[src_offs + nr + i]) + count = *area_count(gopts->area_src, nr + i, gopts); + if (count != gopts->count_verify[src_offs + nr + i]) err("nr %lu source memory invalid %llu %llu\n", - nr + i, count, count_verify[src_offs + nr + i]); + nr + i, count, gopts->count_verify[src_offs + nr + i]); } /* Faulting into area_dst should move the page or the huge page */ for (i = 0; i < step_size; i++) { - count = *area_count(area_dst, nr + i); - if (count != count_verify[dst_offs + nr + i]) + count = *area_count(gopts->area_dst, nr + i, gopts); + if (count != gopts->count_verify[dst_offs + nr + i]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[dst_offs + nr + i]); + nr, count, gopts->count_verify[dst_offs + nr + i]); } /* Re-check area_src content which should be empty */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); + count = *area_count(gopts->area_src, nr + i, gopts); if (count != 0) err("nr %lu move failed %llu %llu\n", - nr, count, count_verify[src_offs + nr + i]); + nr, count, gopts->count_verify[src_offs + nr + i]); } } - if (chunk_size > page_size) { - area_src = orig_area_src; - area_dst = orig_area_dst; + if (chunk_size > gopts->page_size) { + gopts->area_src = orig_area_src; + gopts->area_dst = orig_area_dst; } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); @@ -1218,24 +1214,24 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, uffd_test_pass(); } -static void uffd_move_test(uffd_test_args_t *targs) +static void uffd_move_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - uffd_move_test_common(targs, page_size, uffd_move_handle_fault); + uffd_move_test_common(gopts, targs, gopts->page_size, uffd_move_handle_fault); } -static void uffd_move_pmd_test(uffd_test_args_t *targs) +static void uffd_move_pmd_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) err("madvise(MADV_HUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } -static void uffd_move_pmd_split_test(uffd_test_args_t *targs) +static void uffd_move_pmd_split_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) err("madvise(MADV_NOHUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } @@ -1295,6 +1291,11 @@ typedef enum { THR_STATE_UNINTERRUPTIBLE, } thread_state; +typedef struct { + uffd_global_test_opts_t *gopts; + volatile pid_t *pid; +} mmap_changing_thread_args; + static void sleep_short(void) { usleep(1000); @@ -1337,7 +1338,9 @@ static void thread_state_until(pid_t tid, thread_state state) static void *uffd_mmap_changing_thread(void *opaque) { - volatile pid_t *pid = opaque; + mmap_changing_thread_args *args = opaque; + uffd_global_test_opts_t *gopts = args->gopts; + volatile pid_t *pid = args->pid; int ret; /* Unfortunately, it's only fetch-able from the thread itself.. */ @@ -1345,21 +1348,21 @@ static void *uffd_mmap_changing_thread(void *opaque) *pid = syscall(SYS_gettid); /* Inject an event, this will hang solid until the event read */ - ret = madvise(area_dst, page_size, MADV_REMOVE); + ret = madvise(gopts->area_dst, gopts->page_size, MADV_REMOVE); if (ret) err("madvise(MADV_REMOVE) failed"); return NULL; } -static void uffd_consume_message(int fd) +static void uffd_consume_message(uffd_global_test_opts_t *gopts) { struct uffd_msg msg = { 0 }; - while (uffd_read_msg(fd, &msg)); + while (uffd_read_msg(gopts, &msg)); } -static void uffd_mmap_changing_test(uffd_test_args_t *targs) +static void uffd_mmap_changing_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { /* * This stores the real PID (which can be different from how tid is @@ -1368,13 +1371,14 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) pid_t pid = 0; pthread_t tid; int ret; + mmap_changing_thread_args args = { gopts, &pid }; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("uffd_register() failed"); /* Create a thread to generate the racy event */ - ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); + ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &args); if (ret) err("pthread_create() failed"); @@ -1388,26 +1392,26 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) /* Wait until the thread hangs at REMOVE event */ thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); - if (!uffdio_mmap_changing_test_copy(uffd)) + if (!uffdio_mmap_changing_test_copy(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_zeropage(uffd)) + if (!uffdio_mmap_changing_test_zeropage(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_move(uffd)) + if (!uffdio_mmap_changing_test_move(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_poison(uffd)) + if (!uffdio_mmap_changing_test_poison(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_continue(uffd)) + if (!uffdio_mmap_changing_test_continue(gopts->uffd)) return; /* * All succeeded above! Recycle everything. Start by reading the * event so as to kick the thread roll again.. */ - uffd_consume_message(uffd); + uffd_consume_message(gopts); ret = pthread_join(tid, NULL); assert(ret == 0); @@ -1415,10 +1419,10 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) uffd_test_pass(); } -static int prevent_hugepages(const char **errmsg) +static int prevent_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) { /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ if (errno != EINVAL) { if (errmsg) @@ -1429,10 +1433,10 @@ static int prevent_hugepages(const char **errmsg) return 0; } -static int request_hugepages(const char **errmsg) +static int request_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) { if (errmsg) { *errmsg = (errno == EINVAL) ? "CONFIG_TRANSPARENT_HUGEPAGE is not set" : @@ -1456,13 +1460,17 @@ struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. */ static void -do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) +do_register_ioctls_test(uffd_global_test_opts_t *gopts, + uffd_test_args_t *args, + bool miss, + bool wp, + bool minor) { uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE); mem_type_t *mem_type = args->mem_type; int ret; - ret = uffd_register_with_ioctls(uffd, area_dst, page_size, + ret = uffd_register_with_ioctls(gopts->uffd, gopts->area_dst, gopts->page_size, miss, wp, minor, &ioctls); /* @@ -1493,18 +1501,18 @@ do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", " "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); } -static void uffd_register_ioctls_test(uffd_test_args_t *args) +static void uffd_register_ioctls_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { int miss, wp, minor; for (miss = 0; miss <= 1; miss++) for (wp = 0; wp <= 1; wp++) for (minor = 0; minor <= 1; minor++) - do_register_ioctls_test(args, miss, wp, minor); + do_register_ioctls_test(gopts, args, miss, wp, minor); uffd_test_pass(); } @@ -1742,6 +1750,28 @@ int main(int argc, char *argv[]) } for (j = 0; j < n_mems; j++) { mem_type = &mem_types[j]; + + /* Initialize global test options */ + uffd_global_test_opts_t gopts = { 0 }; + + gopts.map_shared = mem_type->shared; + uffd_test_ops = mem_type->mem_ops; + uffd_test_case_ops = test->test_case_ops; + + if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) + gopts.page_size = default_huge_page_size(); + else + gopts.page_size = psize(); + + /* Ensure we have at least 2 pages */ + gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2) + / gopts.page_size; + + gopts.nr_parallel = 1; + + /* Initialize test arguments */ + args.mem_type = mem_type; + if (!(test->mem_targets & mem_type->mem_flag)) continue; @@ -1756,13 +1786,12 @@ int main(int argc, char *argv[]) uffd_test_skip("feature missing"); continue; } - if (uffd_setup_environment(&args, test, mem_type, - &errmsg)) { + if (uffd_test_ctx_init(&gopts, test->uffd_feature_required, &errmsg)) { uffd_test_skip(errmsg); continue; } - test->uffd_fn(&args); - uffd_test_ctx_clear(); + test->uffd_fn(&gopts, &args); + uffd_test_ctx_clear(&gopts); } } diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c index c2ba7d46c7b4..4e4a591cf527 100644 --- a/tools/testing/selftests/mm/uffd-wp-mremap.c +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -19,11 +19,6 @@ static size_t thpsizes[20]; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; -static int sz2ord(size_t size) -{ - return __builtin_ctzll(size / pagesize); -} - static int detect_thp_sizes(size_t sizes[], int max) { int count = 0; @@ -87,9 +82,9 @@ static void *alloc_one_folio(size_t size, bool private, bool hugetlb) struct thp_settings settings = *thp_current_settings(); if (private) - settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS; else - settings.shmem_hugepages[sz2ord(size)].enabled = SHMEM_ALWAYS; + settings.shmem_hugepages[sz2ord(size, pagesize)].enabled = SHMEM_ALWAYS; thp_push_settings(&settings); @@ -157,7 +152,8 @@ static bool range_is_swapped(void *addr, size_t size) return true; } -static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb) +static void test_one_folio(uffd_global_test_opts_t *gopts, size_t size, bool private, + bool swapout, bool hugetlb) { struct uffdio_writeprotect wp_prms; uint64_t features = 0; @@ -181,21 +177,21 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb } /* Register range for uffd-wp. */ - if (userfaultfd_open(&features)) { + if (userfaultfd_open(gopts, &features)) { if (errno == ENOENT) ksft_test_result_skip("userfaultfd not available\n"); else ksft_test_result_fail("userfaultfd_open() failed\n"); goto out; } - if (uffd_register(uffd, mem, size, false, true, false)) { + if (uffd_register(gopts->uffd, mem, size, false, true, false)) { ksft_test_result_fail("uffd_register() failed\n"); goto out; } wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP; wp_prms.range.start = (uintptr_t)mem; wp_prms.range.len = size; - if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { + if (ioctl(gopts->uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n"); goto out; } @@ -242,9 +238,9 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb out: if (mem) munmap(mem, size); - if (uffd >= 0) { - close(uffd); - uffd = -1; + if (gopts->uffd >= 0) { + close(gopts->uffd); + gopts->uffd = -1; } } @@ -336,6 +332,7 @@ static const struct testcase testcases[] = { int main(int argc, char **argv) { + uffd_global_test_opts_t gopts = { 0 }; struct thp_settings settings; int i, j, plan = 0; @@ -367,8 +364,8 @@ int main(int argc, char **argv) const struct testcase *tc = &testcases[i]; for (j = 0; j < *tc->nr_sizes; j++) - test_one_folio(tc->sizes[j], tc->private, tc->swapout, - tc->hugetlb); + test_one_folio(&gopts, tc->sizes[j], tc->private, + tc->swapout, tc->hugetlb); } /* If THP is supported, restore original THP settings. */ diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index 896b3f73fc53..306eba825107 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -230,10 +230,10 @@ void testcases_init(void) .msg = "mmap(-1, MAP_HUGETLB) again", }, { - .addr = (void *)(addr_switch_hint - pagesize), + .addr = (void *)(addr_switch_hint - hugepagesize), .size = 2 * hugepagesize, .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(addr_switch_hint - pagesize, 2*hugepagesize, MAP_HUGETLB)", + .msg = "mmap(addr_switch_hint - hugepagesize, 2*hugepagesize, MAP_HUGETLB)", .low_addr_required = 1, .keep_mapped = 1, }, diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 325de53966b6..a7d4b02b21dd 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -9,6 +9,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +orig_nr_hugepages=0 skip() { @@ -76,5 +77,41 @@ check_test_requirements() esac } +save_nr_hugepages() +{ + orig_nr_hugepages=$(cat /proc/sys/vm/nr_hugepages) +} + +restore_nr_hugepages() +{ + echo "$orig_nr_hugepages" > /proc/sys/vm/nr_hugepages +} + +setup_nr_hugepages() +{ + local needpgs=$1 + while read -r name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs="$size" + break + fi + done < /proc/meminfo + if [ "$freepgs" -ge "$needpgs" ]; then + return + fi + local hpgs=$((orig_nr_hugepages + needpgs)) + echo $hpgs > /proc/sys/vm/nr_hugepages + + local nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) + if [ "$nr_hugepgs" != "$hpgs" ]; then + restore_nr_hugepages + skip "$0: no enough hugepages for testing" + fi +} + check_test_requirements +save_nr_hugepages +# 4 keep_mapped pages, and one for tmp usage +setup_nr_hugepages 5 ./va_high_addr_switch --run-hugetlb +restore_nr_hugepages diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 169dbd692bf5..81b33d8f78f4 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -44,12 +44,18 @@ * On Arm64 the address space is 256TB and support for * high mappings up to 4PB virtual address space has * been added. + * + * On PowerPC64, the address space up to 128TB can be + * mapped without a hint. Addresses beyond 128TB, up to + * 4PB, can be mapped with a hint. + * */ #define NR_CHUNKS_128TB ((128 * SZ_1TB) / MAP_CHUNK_SIZE) /* Number of chunks for 128TB */ #define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL) #define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL) #define NR_CHUNKS_3840TB (NR_CHUNKS_128TB * 30UL) +#define NR_CHUNKS_3968TB (NR_CHUNKS_128TB * 31UL) #define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */ #define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */ @@ -59,6 +65,11 @@ #define HIGH_ADDR_SHIFT 49 #define NR_CHUNKS_LOW NR_CHUNKS_256TB #define NR_CHUNKS_HIGH NR_CHUNKS_3840TB +#elif defined(__PPC64__) +#define HIGH_ADDR_MARK ADDR_MARK_128TB +#define HIGH_ADDR_SHIFT 48 +#define NR_CHUNKS_LOW NR_CHUNKS_128TB +#define NR_CHUNKS_HIGH NR_CHUNKS_3968TB #else #define HIGH_ADDR_MARK ADDR_MARK_128TB #define HIGH_ADDR_SHIFT 48 @@ -227,7 +238,7 @@ int main(int argc, char *argv[]) if (hptr[i] == MAP_FAILED) break; - mark_range(ptr[i], MAP_CHUNK_SIZE); + mark_range(hptr[i], MAP_CHUNK_SIZE); validate_addr(hptr[i], 1); } hchunks = i; diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 9dafa7669ef9..e33cda301dad 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -338,6 +338,19 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max) return count; } +int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags) +{ + size_t count; + + count = pread(kpageflags_fd, flags, sizeof(*flags), + pfn * sizeof(*flags)); + + if (count != sizeof(*flags)) + return -1; + + return 0; +} + /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls) @@ -402,7 +415,7 @@ unsigned long get_free_hugepages(void) return fhp; } -bool check_vmflag_io(void *addr) +static bool check_vmflag(void *addr, const char *flag) { char buffer[MAX_LINE_LENGTH]; const char *flags; @@ -419,13 +432,40 @@ bool check_vmflag_io(void *addr) if (!flaglen) return false; - if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen)) + if (flaglen == strlen(flag) && !memcmp(flags, flag, flaglen)) return true; flags += flaglen; } } +bool check_vmflag_io(void *addr) +{ + return check_vmflag(addr, "io"); +} + +bool check_vmflag_pfnmap(void *addr) +{ + return check_vmflag(addr, "pf"); +} + +bool softdirty_supported(void) +{ + char *addr; + bool supported = false; + const size_t pagesize = getpagesize(); + + /* New mappings are expected to be marked with VM_SOFTDIRTY (sd). */ + addr = mmap(0, pagesize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (!addr) + ksft_exit_fail_msg("mmap failed\n"); + + supported = check_vmflag(addr, "sd"); + munmap(addr, pagesize); + return supported; +} + /* * Open an fd at /proc/$pid/maps and configure procmap_out ready for * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise. @@ -555,3 +595,126 @@ bool detect_huge_zeropage(void) close(fd); return enabled; } + +long ksm_get_self_zero_pages(void) +{ + int proc_self_ksm_stat_fd; + char buf[200]; + char *substr_ksm_zero; + size_t value_pos; + ssize_t read_size; + + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + if (proc_self_ksm_stat_fd < 0) + return -errno; + + read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); + close(proc_self_ksm_stat_fd); + if (read_size < 0) + return -errno; + + buf[read_size] = 0; + + substr_ksm_zero = strstr(buf, "ksm_zero_pages"); + if (!substr_ksm_zero) + return 0; + + value_pos = strcspn(substr_ksm_zero, "0123456789"); + return strtol(substr_ksm_zero + value_pos, NULL, 10); +} + +long ksm_get_self_merging_pages(void) +{ + int proc_self_ksm_merging_pages_fd; + char buf[10]; + ssize_t ret; + + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + if (proc_self_ksm_merging_pages_fd < 0) + return -errno; + + ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); + close(proc_self_ksm_merging_pages_fd); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + +long ksm_get_full_scans(void) +{ + int ksm_full_scans_fd; + char buf[10]; + ssize_t ret; + + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + return -errno; + + ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); + close(ksm_full_scans_fd); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + +int ksm_use_zero_pages(void) +{ + int ksm_use_zero_pages_fd; + ssize_t ret; + + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + if (ksm_use_zero_pages_fd < 0) + return -errno; + + ret = write(ksm_use_zero_pages_fd, "1", 1); + close(ksm_use_zero_pages_fd); + return ret == 1 ? 0 : -errno; +} + +int ksm_start(void) +{ + int ksm_fd; + ssize_t ret; + long start_scans, end_scans; + + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + return -errno; + + /* Wait for two full scans such that any possible merging happened. */ + start_scans = ksm_get_full_scans(); + if (start_scans < 0) { + close(ksm_fd); + return start_scans; + } + ret = write(ksm_fd, "1", 1); + close(ksm_fd); + if (ret != 1) + return -errno; + do { + end_scans = ksm_get_full_scans(); + if (end_scans < 0) + return end_scans; + } while (end_scans < start_scans + 2); + + return 0; +} + +int ksm_stop(void) +{ + int ksm_fd; + ssize_t ret; + + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + return -errno; + + ret = write(ksm_fd, "2", 1); + close(ksm_fd); + return ret == 1 ? 0 : -errno; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b55d1809debc..26c30fdc0241 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -18,6 +18,9 @@ #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) +#define KPF_COMPOUND_HEAD BIT_ULL(15) +#define KPF_COMPOUND_TAIL BIT_ULL(16) +#define KPF_THP BIT_ULL(22) /* * Ignore the checkpatch warning, we must read from x but don't want to do * anything with it in order to trigger a read page fault. We therefore must use @@ -85,6 +88,7 @@ bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); int64_t allocate_transhuge(void *ptr, int pagemap_fd); unsigned long default_huge_page_size(void); int detect_hugetlb_page_sizes(size_t sizes[], int max); +int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags); int uffd_register(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor); @@ -93,12 +97,14 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); unsigned long get_free_hugepages(void); bool check_vmflag_io(void *addr); +bool check_vmflag_pfnmap(void *addr); int open_procmap(pid_t pid, struct procmap_fd *procmap_out); int query_procmap(struct procmap_fd *procmap); bool find_vma_procmap(struct procmap_fd *procmap, void *address); int close_procmap(struct procmap_fd *procmap); int write_sysfs(const char *file_path, unsigned long val); int read_sysfs(const char *file_path, unsigned long *val); +bool softdirty_supported(void); static inline int open_self_procmap(struct procmap_fd *procmap_out) { @@ -126,9 +132,21 @@ static inline void log_test_result(int result) ksft_test_result_report(result, "%s\n", test_name); } +static inline int sz2ord(size_t size, size_t pagesize) +{ + return __builtin_ctzll(size / pagesize); +} + void *sys_mremap(void *old_address, unsigned long old_size, unsigned long new_size, int flags, void *new_address); +long ksm_get_self_zero_pages(void); +long ksm_get_self_merging_pages(void); +long ksm_get_full_scans(void); +int ksm_use_zero_pages(void); +int ksm_start(void); +int ksm_stop(void); + /* * On ppc64 this will only work with radix 2M hugepage size */ diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..439101b518ee 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy @@ -51,6 +52,7 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tcp_port_share tfo timestamping tls diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 2b31d4a93ad7..5d9d96515c4a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -8,11 +8,12 @@ CFLAGS += -I../ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ rtnetlink.sh xfrm_policy.sh +TEST_PROGS += fcnal-ipv4.sh fcnal-ipv6.sh fcnal-other.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh -TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh +TEST_PROGS += tcp_fastopen_backup_key.sh l2tp.sh traceroute.sh TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh @@ -119,12 +120,18 @@ TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_ether_dst.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_GEN_PROGS += ipv6_fragmentation +TEST_PROGS += route_hint.sh +TEST_GEN_PROGS += tcp_port_share # YNL files, must be before "include ..lib.mk" -YNL_GEN_FILES := busy_poller netlink-dumps +YNL_GEN_FILES := busy_poller +YNL_GEN_PROGS := netlink-dumps TEST_GEN_FILES += $(YNL_GEN_FILES) +TEST_GEN_PROGS += $(YNL_GEN_PROGS) TEST_FILES := settings +TEST_FILES += fcnal-test.sh TEST_FILES += in_netns.sh lib.sh setup_loopback.sh setup_veth.sh TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index a4b61c6d0290..0a20c98bbcfd 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ -CFLAGS += $(KHDR_INCLUDES) +CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c index 9d22561e7b8f..fc467714387e 100644 --- a/tools/testing/selftests/net/af_unix/scm_inq.c +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -11,11 +11,6 @@ #define NR_CHUNKS 100 #define MSG_LEN 256 -struct scm_inq { - struct cmsghdr cmsghdr; - int inq; -}; - FIXTURE(scm_inq) { int fd[2]; @@ -70,35 +65,38 @@ static void send_chunks(struct __test_metadata *_metadata, static void recv_chunks(struct __test_metadata *_metadata, FIXTURE_DATA(scm_inq) *self) { + char cmsg_buf[CMSG_SPACE(sizeof(int))]; struct msghdr msg = {}; struct iovec iov = {}; - struct scm_inq cmsg; + struct cmsghdr *cmsg; char buf[MSG_LEN]; int i, ret; int inq; msg.msg_iov = &iov; msg.msg_iovlen = 1; - msg.msg_control = &cmsg; - msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + msg.msg_control = cmsg_buf; + msg.msg_controllen = sizeof(cmsg_buf); iov.iov_base = buf; iov.iov_len = sizeof(buf); for (i = 0; i < NR_CHUNKS; i++) { memset(buf, 0, sizeof(buf)); - memset(&cmsg, 0, sizeof(cmsg)); + memset(cmsg_buf, 0, sizeof(cmsg_buf)); ret = recvmsg(self->fd[1], &msg, 0); ASSERT_EQ(MSG_LEN, ret); - ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); - ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); - ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); - ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg->cmsg_type); ret = ioctl(self->fd[1], SIOCINQ, &inq); ASSERT_EQ(0, ret); - ASSERT_EQ(cmsg.inq, inq); + ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq); } } diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 37e034874034..ef2921988e5f 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -137,7 +137,6 @@ struct cmsg_data { static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) { struct cmsghdr *cmsg; - int data = 0; if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { log_err("recvmsg: truncated"); @@ -243,7 +242,6 @@ static int cmsg_check_dead(int fd, int expected_pid) int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - pid_t client_pid; struct pidfd_info info = { .mask = PIDFD_INFO_EXIT, }; diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index 8b015f16c03d..914f99d153ce 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -271,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata, { #define MSG "x" #define MSGLEN 1 - struct { - struct cmsghdr cmsghdr; - int fd[2]; - } cmsg = { - .cmsghdr = { - .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), - .cmsg_level = SOL_SOCKET, - .cmsg_type = SCM_RIGHTS, - }, - .fd = { - self->fd[inflight * 2], - self->fd[inflight * 2], - }, + int fds[2] = { + self->fd[inflight * 2], + self->fd[inflight * 2], }; + char cmsg_buf[CMSG_SPACE(sizeof(fds))]; struct iovec iov = { .iov_base = MSG, .iov_len = MSGLEN, @@ -294,11 +285,18 @@ void __send_fd(struct __test_metadata *_metadata, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, - .msg_control = &cmsg, - .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), }; + struct cmsghdr *cmsg; int ret; + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); if (variant->disabled) { diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index b2c271b79240..c856d266c8f3 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): progs = [ p for p in progs if not p['orphaned'] ] if expected is not None: if len(progs) != expected: - fail(True, "%d BPF programs loaded, expected %d" % - (len(progs), expected)) + fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" % + (len(progs), expected, pp.pformat(progs))) return progs def bpftool_map_list(expected=None, ns=""): diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index a825e628aee7..ded9b925865e 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -491,7 +491,8 @@ int main(int argc, char *argv[]) if (err) { fprintf(stderr, "Can't resolve address [%s]:%s\n", opt.host, opt.service); - return ERN_SOCK_CREATE; + err = ERN_SOCK_CREATE; + goto err_free_buff; } if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP) @@ -500,8 +501,8 @@ int main(int argc, char *argv[]) fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto); if (fd < 0) { fprintf(stderr, "Can't open socket: %s\n", strerror(errno)); - freeaddrinfo(ai); - return ERN_RESOLVE; + err = ERN_RESOLVE; + goto err_free_info; } if (opt.sock.proto == IPPROTO_ICMP) { @@ -574,6 +575,9 @@ int main(int argc, char *argv[]) err_out: close(fd); +err_free_info: freeaddrinfo(ai); +err_free_buff: + free(buf); return err; } diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index c24417d0047b..d548611e2698 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -26,6 +26,7 @@ CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y +CONFIG_CRYPTO_SHA1=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NETFILTER=y diff --git a/tools/testing/selftests/net/fcnal-ipv4.sh b/tools/testing/selftests/net/fcnal-ipv4.sh new file mode 100755 index 000000000000..82f9c867c3e8 --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv4.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv4 diff --git a/tools/testing/selftests/net/fcnal-ipv6.sh b/tools/testing/selftests/net/fcnal-ipv6.sh new file mode 100755 index 000000000000..ab1fc7aa3caf --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv6.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv6 diff --git a/tools/testing/selftests/net/fcnal-other.sh b/tools/testing/selftests/net/fcnal-other.sh new file mode 100755 index 000000000000..a840cf80b32e --- /dev/null +++ b/tools/testing/selftests/net/fcnal-other.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t other diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 4fcc38907e48..844a580ae74e 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -189,7 +189,7 @@ show_hint() kill_procs() { killall nettest ping ping6 >/dev/null 2>&1 - sleep 1 + slowwait 2 sh -c 'test -z "$(pgrep '"'^(nettest|ping|ping6)$'"')"' } set_ping_group() @@ -424,6 +424,8 @@ create_ns() ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.accept_dad=0 } # create veth pair to connect namespaces and apply addresses. @@ -875,7 +877,7 @@ ipv4_tcp_md5_novrf() # basic use case log_start run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -883,7 +885,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -891,7 +893,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -899,7 +901,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -910,7 +912,7 @@ ipv4_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -918,7 +920,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -926,7 +928,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -943,7 +945,7 @@ ipv4_tcp_md5() # basic use case log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -951,7 +953,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -959,7 +961,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -967,7 +969,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -978,7 +980,7 @@ ipv4_tcp_md5() # client in prefix log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -986,7 +988,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -994,7 +996,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -1005,14 +1007,14 @@ ipv4_tcp_md5() log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -1020,7 +1022,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1028,21 +1030,21 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -1050,7 +1052,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1058,7 +1060,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -1082,14 +1084,14 @@ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex() log_start show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection" log_start show_hint "Binding both the socket and the key is not required but it works" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection" } @@ -1103,25 +1105,25 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0() log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection" @@ -1193,7 +1195,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -1201,7 +1203,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1221,13 +1223,13 @@ ipv4_tcp_novrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1249,7 +1251,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1257,7 +1259,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1266,7 +1268,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1274,7 +1276,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -1283,7 +1285,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -1291,7 +1293,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1323,19 +1325,19 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1352,7 +1354,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -1374,14 +1376,14 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -1396,7 +1398,7 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to device" run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1406,7 +1408,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since client is not bound to VRF" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -1418,13 +1420,13 @@ ipv4_tcp_vrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1443,7 +1445,7 @@ ipv4_tcp_vrf() do log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -1451,26 +1453,26 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" log_start show_hint "Should fail 'No route to host' since client is out of VRF scope" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" } @@ -1509,7 +1511,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -1522,7 +1524,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1533,31 +1535,31 @@ ipv4_udp_novrf() do log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()" @@ -1580,7 +1582,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1588,7 +1590,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1597,7 +1599,7 @@ ipv4_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1605,25 +1607,25 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1636,28 +1638,28 @@ ipv4_udp_novrf() log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 2 "Global server, device client, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1667,7 +1669,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -1709,19 +1711,19 @@ ipv4_udp_vrf() log_start show_hint "Fails because ingress is in a VRF and global server is disabled" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1733,7 +1735,7 @@ ipv4_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is out of scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local connection" done @@ -1741,26 +1743,26 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection" a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1775,19 +1777,19 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1802,13 +1804,13 @@ ipv4_udp_vrf() # log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "VRF client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "Enslaved device client" @@ -1829,31 +1831,31 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1861,7 +1863,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" done @@ -1870,7 +1872,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -s -D -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -2093,7 +2095,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2107,7 +2109,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2120,7 +2122,7 @@ ipv4_rt() a=${NSA_IP} log_start run_cmd nettest ${varg} -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2134,7 +2136,7 @@ ipv4_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2145,7 +2147,7 @@ ipv4_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2161,7 +2163,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2175,7 +2177,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2189,7 +2191,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2200,7 +2202,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2211,7 +2213,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2561,7 +2563,7 @@ ipv6_tcp_md5_novrf() # basic use case log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -2569,7 +2571,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -2577,7 +2579,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -2585,7 +2587,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -2596,7 +2598,7 @@ ipv6_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -2604,7 +2606,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -2612,7 +2614,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -2629,7 +2631,7 @@ ipv6_tcp_md5() # basic use case log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -2637,7 +2639,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -2645,7 +2647,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -2653,7 +2655,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -2664,7 +2666,7 @@ ipv6_tcp_md5() # client in prefix log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -2672,7 +2674,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -2680,7 +2682,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -2691,14 +2693,14 @@ ipv6_tcp_md5() log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -2706,7 +2708,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2714,21 +2716,21 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -2736,7 +2738,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2744,7 +2746,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -2772,7 +2774,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2793,7 +2795,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Client" done @@ -2802,7 +2804,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -2822,7 +2824,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -2830,7 +2832,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2839,7 +2841,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -2847,7 +2849,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -2856,7 +2858,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -2865,7 +2867,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" done @@ -2898,7 +2900,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -2907,7 +2909,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2916,7 +2918,7 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2924,7 +2926,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -2943,7 +2945,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -2964,7 +2966,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2973,7 +2975,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2982,13 +2984,13 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2996,7 +2998,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3016,7 +3018,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails 'Connection refused' since client is not in VRF" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -3029,7 +3031,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" done @@ -3038,7 +3040,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails since VRF device does not allow linklocal addresses" run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 1 "Client, VRF bind" @@ -3046,7 +3048,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -3071,7 +3073,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -3079,7 +3081,7 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" @@ -3087,13 +3089,13 @@ ipv6_tcp_vrf() log_start show_hint "Should fail since unbound client is out of VRF scope" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" @@ -3101,7 +3103,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" done @@ -3141,13 +3143,13 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3155,7 +3157,7 @@ ipv6_udp_novrf() a=${NSA_LO_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -3165,7 +3167,7 @@ ipv6_udp_novrf() #log_start #show_hint "Should fail since loopback address is out of scope" #run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - #sleep 1 + wait_local_port_listen ${NSA} 12345 udp #run_cmd_nsb nettest -6 -D -r ${a} #log_test_addr ${a} $? 1 "Device server" @@ -3185,25 +3187,25 @@ ipv6_udp_novrf() do log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF" @@ -3225,7 +3227,7 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -3233,7 +3235,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -3242,7 +3244,7 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -6 -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Device server, local connection" done @@ -3250,19 +3252,19 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection" @@ -3271,28 +3273,28 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" done @@ -3300,7 +3302,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3314,7 +3316,7 @@ ipv6_udp_novrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3338,7 +3340,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -3347,7 +3349,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3356,7 +3358,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3378,7 +3380,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local conn" done @@ -3387,7 +3389,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -3396,25 +3398,25 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 1 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -3429,7 +3431,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -3438,7 +3440,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3447,7 +3449,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3465,7 +3467,7 @@ ipv6_udp_vrf() # log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6} log_test $? 0 "VRF client" @@ -3476,7 +3478,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6} log_test $? 0 "Enslaved device client" @@ -3491,13 +3493,13 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" #log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3505,13 +3507,13 @@ ipv6_udp_vrf() a=${VRF_IP6} log_start run_cmd nettest -6 -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3527,25 +3529,25 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3557,7 +3559,7 @@ ipv6_udp_vrf() # link local addresses log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Global server, linklocal IP" @@ -3568,7 +3570,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6} log_test $? 0 "Enslaved device client, linklocal IP" @@ -3579,7 +3581,7 @@ ipv6_udp_vrf() log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Enslaved device client, local conn - linklocal IP" @@ -3592,7 +3594,7 @@ ipv6_udp_vrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3771,7 +3773,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3785,7 +3787,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3799,7 +3801,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3814,7 +3816,7 @@ ipv6_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3825,7 +3827,7 @@ ipv6_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3842,7 +3844,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3856,7 +3858,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3869,7 +3871,7 @@ ipv6_rt() a=${NSA_IP6} log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3880,7 +3882,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3891,7 +3893,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3950,7 +3952,7 @@ netfilter_tcp_reset() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -3968,7 +3970,7 @@ netfilter_icmp() do log_start run_cmd nettest ${arg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4007,7 +4009,7 @@ netfilter_tcp6_reset() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -4025,7 +4027,7 @@ netfilter_icmp6() do log_start run_cmd nettest -6 -s ${arg} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4221,12 +4223,12 @@ use_case_snat_on_vrf() run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF} run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port} log_test $? 0 "IPv4 TCP connection over VRF with SNAT" run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port} log_test $? 0 "IPv6 TCP connection over VRF with SNAT" @@ -4272,6 +4274,7 @@ EOF TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" +# note: each TEST_ group needs a dedicated runner, e.g. fcnal-ipv4.sh PAUSE_ON_FAIL=no PAUSE=no @@ -4302,6 +4305,8 @@ elif [ "$TESTS" = "ipv4" ]; then TESTS="$TESTS_IPV4" elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" +elif [ "$TESTS" = "other" ]; then + TESTS="$TESTS_OTHER" fi check_gen_prog "nettest" diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh index c159230c9b62..0b8a2465dd04 100755 --- a/tools/testing/selftests/net/fdb_notify.sh +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -40,16 +40,16 @@ do_test_dup() test_dup_bridge() { - ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add br up type bridge vlan_filtering 1 do_test_dup add "bridge" dev br self do_test_dup del "bridge" dev br self } test_dup_vxlan_self() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_set_master vx br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br do_test_dup add "vxlan" dev vx self dst 192.0.2.1 do_test_dup del "vxlan" dev vx self dst 192.0.2.1 @@ -57,9 +57,9 @@ test_dup_vxlan_self() test_dup_vxlan_master() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_set_master vx br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br do_test_dup add "vxlan master" dev vx master do_test_dup del "vxlan master" dev vx master @@ -67,8 +67,8 @@ test_dup_vxlan_master() test_dup_macvlan_self() { - ip_link_add dd up type dummy - ip_link_add mv up link dd type macvlan mode passthru + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru do_test_dup add "macvlan self" dev mv self do_test_dup del "macvlan self" dev mv self @@ -76,10 +76,10 @@ test_dup_macvlan_self() test_dup_macvlan_master() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add dd up type dummy - ip_link_add mv up link dd type macvlan mode passthru - ip_link_set_master mv br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru + adf_ip_link_set_master mv br do_test_dup add "macvlan master" dev mv self do_test_dup del "macvlan master" dev mv self diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index d7bb2e80e88c..e6f482a600da 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,8 +1,11 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_fdb_learning_limit.sh \ +TEST_PROGS = \ + bridge_activity_notify.sh \ + bridge_fdb_learning_limit.sh \ bridge_igmp.sh \ bridge_locked_port.sh \ + bridge_fdb_local_vlan_0.sh \ bridge_mdb.sh \ bridge_mdb_host.sh \ bridge_mdb_max.sh \ diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index 7b41cff993ad..392a5a91ed37 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -57,6 +57,21 @@ o Code shall be checked using ShellCheck [1] prior to submission. 1. https://www.shellcheck.net/ +Cleanups +-------- + +o lib.sh brings in defer.sh (by way of ../lib.sh) by default. Consider + making use of the defer primitive to schedule automatic cleanups. This + makes it harder to forget to remove a temporary netdevice, kill a running + process or perform other cleanup when the test script is interrupted. + +o When adding a helper that dirties the environment, but schedules all + necessary cleanups through defer, consider prefixing it adf_ for + consistency with lib.sh and ../lib.sh helpers. This serves as an + immediately visible bit of documentation about the helper API. + +o Definitely do the above for any new code in lib.sh, if practical. + Customization ============= diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh new file mode 100755 index 000000000000..522a5b1b046c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | 192.0.2.1/28 | | 192.0.2.2/28 | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + new_inactive_test + existing_active_test + norefresh_test +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + adf_simple_if_init "$h1" 192.0.2.1/28 +} + +h2_create() +{ + adf_simple_if_init "$h2" 192.0.2.2/28 +} + +switch_create() +{ + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \ + ageing_time "$LOW_AGEING_TIME" + adf_ip_link_set_up br1 + + adf_ip_link_set_master "$swp1" br1 + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_master "$swp2" br1 + adf_ip_link_set_up "$swp2" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + adf_vrf_prepare + + h1_create + h2_create + switch_create +} + +fdb_active_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q -v "inactive" +} + +fdb_inactive_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" +} + +new_inactive_test() +{ + local mac="00:11:22:33:44:55" + + # Add a new FDB entry as static and inactive and check that it + # becomes active upon traffic. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_err $? "FDB entry not present as \"inactive\" when should" + + $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q + + busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac" + check_err $? "FDB entry present as \"inactive\" when should not" + + log_test "Transition from inactive to active" + + bridge fdb del "$mac" dev "$swp1" master +} + +existing_active_test() +{ + local mac="00:11:22:33:44:55" + local ageing_time + + # Enable activity notifications on an existing dynamic FDB entry and + # check that it becomes inactive after the ageing time passed. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master dynamic + bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh + + bridge -d fdb get "$mac" br br1 | grep -q "activity_notify" + check_err $? "FDB entry not present as \"activity_notify\" when should" + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_fail $? "FDB entry present as \"inactive\" when should not" + + ageing_time=$(bridge_ageing_time_get br1) + slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac" + check_err $? "FDB entry not present as \"inactive\" when should" + + log_test "Transition from active to inactive" + + bridge fdb del "$mac" dev "$swp1" master +} + +norefresh_test() +{ + local mac="00:11:22:33:44:55" + local updated_time + + # Check that the "updated" time is reset when replacing an FDB entry + # without the "norefresh" keyword and that it is not reset when + # replacing with the "norefresh" keyword. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static + sleep 1 + + bridge fdb replace "$mac" dev "$swp1" master static activity_notify + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -ne 0 ]]; then + check_err 1 "\"updated\" time was not reset when should" + fi + + sleep 1 + bridge fdb replace "$mac" dev "$swp1" master static norefresh + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -eq 0 ]]; then + check_err 1 "\"updated\" time was reset when should not" + fi + + log_test "Resetting of \"updated\" time" + + bridge fdb del "$mac" dev "$swp1" master +} + +if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then + echo "SKIP: iproute2 too old, missing bridge FDB activity notification control" + exit "$ksft_skip" +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh new file mode 100755 index 000000000000..694de8ba97e4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh @@ -0,0 +1,387 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +-----------------------+ +-----------------------+ +# | H1 (vrf) | | H2 (vrf) | | H3 (vrf) | +# | + $h1 | | + $h2 | | + $h3 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | | | 192.0.2.18/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | +# | | | | | | | | | +# +----|------------------+ +----|------------------+ +----|------------------+ +# | | | +# +----|-------------------------|-------------------------|------------------+ +# | +--|-------------------------|------------------+ | | +# | | + $swp1 + $swp2 | + $swp3 | +# | | | 192.0.2.17/28 | +# | | BR1 (802.1q) | 2001:db8:2::1/64 | +# | | 192.0.2.3/28 | | +# | | 2001:db8:1::3/64 | | +# | +-----------------------------------------------+ SW | +# +---------------------------------------------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. +#shellcheck disable=SC2034 # ... and global variables + +ALL_TESTS=" + test_d_no_sharing + test_d_sharing + test_q_no_sharing + test_q_sharing + test_addr_set +" + +NUM_NETIFS=6 +source lib.sh + +pMAC=00:11:22:33:44:55 +bMAC=00:11:22:33:44:66 +mMAC=00:11:22:33:44:77 +xMAC=00:11:22:33:44:88 + +host_create() +{ + local h=$1; shift + local ipv4=$1; shift + local ipv6=$1; shift + + adf_simple_if_init "$h" "$ipv4" "$ipv6" + adf_ip_route_add vrf "v$h" 192.0.2.16/28 nexthop via 192.0.2.3 + adf_ip_route_add vrf "v$h" 2001:db8:2::/64 nexthop via 2001:db8:1::3 +} + +h3_create() +{ + adf_simple_if_init "$h3" 192.0.2.18/28 2001:db8:2::2/64 + adf_ip_route_add vrf "v$h3" 192.0.2.0/28 nexthop via 192.0.2.17 + adf_ip_route_add vrf "v$h3" 2001:db8:1::/64 nexthop via 2001:db8:2::1 + + tc qdisc add dev "$h3" clsact + defer tc qdisc del dev "$h3" clsact + + tc filter add dev "$h3" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h3" ingress proto ip pref 104 + + tc qdisc add dev "$h2" clsact + defer tc qdisc del dev "$h2" clsact + + tc filter add dev "$h2" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h2" ingress proto ip pref 104 +} + +switch_create() +{ + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_up "$swp2" + + adf_ip_addr_add "$swp3" 192.0.2.17/28 + adf_ip_addr_add "$swp3" 2001:db8:2::1/64 + adf_ip_link_set_up "$swp3" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + adf_vrf_prepare + adf_forwarding_enable + + host_create "$h1" 192.0.2.1/28 2001:db8:1::1/64 + host_create "$h2" 192.0.2.2/28 2001:db8:1::2/64 + h3_create + + switch_create +} + +adf_bridge_configure() +{ + local dev + + adf_ip_addr_add br 192.0.2.3/28 + adf_ip_addr_add br 2001:db8:1::3/64 + + adf_bridge_vlan_add dev br vid 1 pvid untagged self + adf_bridge_vlan_add dev br vid 2 self + adf_bridge_vlan_add dev br vid 3 self + + for dev in "$swp1" "$swp2"; do + adf_ip_link_set_master "$dev" br + adf_bridge_vlan_add dev "$dev" vid 1 pvid untagged + adf_bridge_vlan_add dev "$dev" vid 2 + adf_bridge_vlan_add dev "$dev" vid 3 + done +} + +adf_bridge_create() +{ + local mac + + adf_ip_link_add br up type bridge vlan_default_pvid 0 "$@" + mac=$(mac_get br) + adf_bridge_configure + adf_ip_link_set_addr br "$mac" +} + +check_fdb_local_vlan_0_support() +{ + if adf_ip_link_add XXbr up type bridge vlan_filtering 1 \ + fdb_local_vlan_0 1 &>/dev/null; then + return 0 + fi + + log_test_skip "FDB sharing" \ + "iproute 2 or the kernel do not support fdb_local_vlan_0" +} + +check_mac_presence() +{ + local should_fail=$1; shift + local dev=$1; shift + local vlan=$1; shift + local mac + + mac=$(mac_get "$dev") + + if ((vlan == 0)); then + vlan=null + fi + + bridge -j fdb show dev "$dev" | + jq -e --arg mac "$mac" --argjson vlan "$vlan" \ + '.[] | select(.mac == $mac) | select(.vlan == $vlan)' > /dev/null + check_err_fail "$should_fail" $? "FDB dev $dev vid $vlan addr $mac exists" +} + +do_sharing_test() +{ + local should_fail=$1; shift + local what=$1; shift + local dev + + RET=0 + + for dev in "$swp1" "$swp2" br; do + check_mac_presence 0 "$dev" 0 + check_mac_presence "$should_fail" "$dev" 1 + check_mac_presence "$should_fail" "$dev" 2 + check_mac_presence "$should_fail" "$dev" 3 + done + + log_test "$what" +} + +do_end_to_end_test() +{ + local mac=$1; shift + local what=$1; shift + local probe_dev=${1-$h3}; shift + local expect=${1-10}; shift + + local t0 + local t1 + local dd + + RET=0 + + # In mausezahn, use $dev MAC as the destination MAC. In the MAC sharing + # context, that will cause an FDB miss on VLAN 1 and prompt a second + # lookup in VLAN 0. + + t0=$(tc_rule_stats_get "$probe_dev" 104 ingress) + + $MZ "$h1" -c 10 -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.18 -t udp "dp=4096,sp=2048" -q + sleep 1 + + t1=$(tc_rule_stats_get "$probe_dev" 104 ingress) + dd=$((t1 - t0)) + + ((dd == expect)) + check_err $? "Expected $expect packets on $probe_dev got $dd" + + log_test "$what" +} + +do_tests() +{ + local should_fail=$1; shift + local what=$1; shift + local swp1_mac + local br_mac + + swp1_mac=$(mac_get "$swp1") + br_mac=$(mac_get br) + + do_sharing_test "$should_fail" "$what" + do_end_to_end_test "$swp1_mac" "$what: end to end, $swp1 MAC" + do_end_to_end_test "$br_mac" "$what: end to end, br MAC" +} + +bridge_standard() +{ + local vlan_filtering=$1; shift + + if ((vlan_filtering)); then + echo 802.1q + else + echo 802.1d + fi +} + +nonexistent_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + # We expect flooding, so $h2 should get the traffic. + do_end_to_end_test "$xMAC" "$standard: Nonexistent FDB" "$h2" +} + +misleading_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + defer_scope_push + # Add an FDB entry on VLAN 0. The lookup on VLAN-aware bridge + # shouldn't pick this up even with fdb_local_vlan_0 enabled, so + # the traffic should be flooded. This all holds on + # vlan_filtering bridge, on non-vlan_filtering one the FDB entry + # is expected to be found as usual, no flooding takes place. + # + # Adding only on VLAN 0 is a bit tricky, because bridge is + # trying to be nice and interprets the request as if the FDB + # should be added on each VLAN. + + bridge fdb add "$mMAC" dev "$swp1" master + bridge fdb del "$mMAC" dev "$swp1" vlan 1 master + bridge fdb del "$mMAC" dev "$swp1" vlan 2 master + bridge fdb del "$mMAC" dev "$swp1" vlan 3 master + + local expect=$((vlan_filtering ? 10 : 0)) + do_end_to_end_test "$mMAC" \ + "$standard: Lookup of non-local MAC on VLAN 0" \ + "$h2" "$expect" + defer_scope_pop +} + +change_mac() +{ + local dev=$1; shift + local mac=$1; shift + local cur_mac + + cur_mac=$(mac_get "$dev") + + log_info "Change $dev MAC $cur_mac -> $mac" + adf_ip_link_set_addr "$dev" "$mac" + defer log_info "Change $dev MAC back" +} + +do_test_no_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + adf_bridge_create vlan_filtering "$vlan_filtering" + setup_wait + + do_tests 0 "$standard, no FDB sharing" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 0 "$standard, no FDB sharing after MAC change" + + in_defer_scope check_fdb_local_vlan_0_support || return + + log_info "Set fdb_local_vlan_0=1" + ip link set dev br type bridge fdb_local_vlan_0 1 + + do_tests 1 "$standard, fdb sharing after toggle" +} + +do_test_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + in_defer_scope check_fdb_local_vlan_0_support || return + + adf_bridge_create vlan_filtering "$vlan_filtering" fdb_local_vlan_0 1 + setup_wait + + do_tests 1 "$standard, FDB sharing" + + nonexistent_fdb_test "$vlan_filtering" + misleading_fdb_test "$vlan_filtering" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 1 "$standard, FDB sharing after MAC change" + + log_info "Set fdb_local_vlan_0=0" + ip link set dev br type bridge fdb_local_vlan_0 0 + + do_tests 0 "$standard, No FDB sharing after toggle" +} + +test_d_no_sharing() +{ + do_test_no_sharing 0 +} + +test_d_sharing() +{ + do_test_sharing 0 +} + +test_q_no_sharing() +{ + do_test_no_sharing 1 +} + +test_q_sharing() +{ + do_test_sharing 1 +} + +adf_addr_set_bridge_create() +{ + adf_ip_link_add br up type bridge vlan_filtering 0 + adf_ip_link_set_addr br "$(mac_get br)" + adf_bridge_configure +} + +test_addr_set() +{ + adf_addr_set_bridge_create + setup_wait + + do_end_to_end_test "$(mac_get br)" "NET_ADDR_SET: end to end, br MAC" +} + +trap cleanup EXIT + +setup_prepare +tests_run diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 7d531f7091e6..5dbfab0e23e3 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -226,7 +226,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:4::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index dda11a4a9450..b4f17a5bbc61 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -321,7 +321,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh index 49fa94b53a1c..25036e38043c 100755 --- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -95,7 +95,7 @@ ipv6_in_too_big_err() # Send too big packets ip vrf exec $vrf_name \ - $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -s 1300 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) test "$((t1 - t0))" -ne 0 @@ -131,7 +131,7 @@ ipv6_in_addr_err() # Disable forwarding temporary while sending the packet sysctl -qw net.ipv6.conf.all.forwarding=0 ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null sysctl -qw net.ipv6.conf.all.forwarding=1 local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) @@ -150,7 +150,7 @@ ipv6_in_discard() # Add a policy to discard ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null ip xfrm policy del dst 2001:1:2::2/128 dir fwd local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index e28b4a079e52..b24acfa52a3a 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -323,7 +323,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 890b3374dacd..a9034f0bb58b 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -571,30 +571,6 @@ wait_for_dev() fi } -cmd_jq() -{ - local cmd=$1 - local jq_exp=$2 - local jq_opts=$3 - local ret - local output - - output="$($cmd)" - # it the command fails, return error right away - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - output=$(echo $output | jq -r $jq_opts "$jq_exp") - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - echo $output - # return success only in case of non-empty output - [ ! -z "$output" ] -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then @@ -623,6 +599,12 @@ vrf_cleanup() ip -4 rule del pref 32765 } +adf_vrf_prepare() +{ + vrf_prepare + defer vrf_cleanup +} + __last_tb_id=0 declare -A __TB_IDS @@ -735,6 +717,12 @@ simple_if_fini() vrf_destroy $vrf_name } +adf_simple_if_init() +{ + simple_if_init "$@" + defer simple_if_fini "$@" +} + tunnel_create() { local name=$1; shift @@ -1035,6 +1023,12 @@ forwarding_restore() sysctl_restore net.ipv4.conf.all.forwarding } +adf_forwarding_enable() +{ + forwarding_enable + defer forwarding_restore +} + declare -A MTU_ORIG mtu_set() { @@ -1291,8 +1285,8 @@ ping_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping_test() @@ -1322,8 +1316,8 @@ ping6_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING6 $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING6 $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping6_test() diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index a20d22d1df36..8d4ae6c952a1 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -238,7 +238,7 @@ test_lag_slave() ip neigh flush dev br1 setup_wait_dev $up_dev setup_wait_dev $host_dev - $ARPING -I br1 192.0.2.130 -qfc 1 + $ARPING -I br1 -qfc 1 192.0.2.130 sleep 2 mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10" diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index 1b902cc579f6..a21c771908b3 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -196,7 +196,7 @@ test_span_gre_forbidden_egress() bridge vlan add dev $swp3 vid 555 # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev @@ -290,7 +290,7 @@ test_span_gre_fdb_roaming() bridge fdb del dev $swp2 $h3mac vlan 555 master 2>/dev/null # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh index 8f9922c695b0..0453210271dc 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -165,8 +165,7 @@ h1_create() { local i; - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 mtu_set $h1 9900 defer mtu_restore $h1 @@ -182,8 +181,7 @@ h2_create() { local i - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 9900 defer mtu_restore $h2 @@ -251,8 +249,7 @@ setup_prepare() put=$swp2 hut=$h2 - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index af166662b78a..f2a3d9254642 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -52,8 +52,7 @@ PKTSZ=1400 h1_create() { - simple_if_init $h1 192.0.2.1/28 - defer simple_if_fini $h1 192.0.2.1/28 + adf_simple_if_init $h1 192.0.2.1/28 mtu_set $h1 10000 defer mtu_restore $h1 @@ -65,8 +64,7 @@ h1_create() h2_create() { - simple_if_init $h2 192.0.2.2/28 - defer simple_if_fini $h2 192.0.2.2/28 + adf_simple_if_init $h2 192.0.2.2/28 mtu_set $h2 10000 defer mtu_restore $h2 @@ -74,8 +72,7 @@ h2_create() h3_create() { - simple_if_init $h3 192.0.2.3/28 - defer simple_if_fini $h3 192.0.2.3/28 + adf_simple_if_init $h3 192.0.2.3/28 mtu_set $h3 10000 defer mtu_restore $h3 @@ -125,8 +122,7 @@ setup_prepare() h3_mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index ec309a5086bc..070c17faa9e4 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -59,8 +59,7 @@ host_create() local dev=$1; shift local host=$1; shift - simple_if_init $dev - defer simple_if_fini $dev + adf_simple_if_init $dev mtu_set $dev 10000 defer mtu_restore $dev @@ -149,8 +148,7 @@ setup_prepare() h2_mac=$(mac_get $h2) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh index 462db0b603e7..6a570d256e07 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -119,16 +119,15 @@ source lib.sh h1_create() { - simple_if_init "$h1" - defer simple_if_fini "$h1" + adf_simple_if_init "$h1" - ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 - ip_link_set_up "$h1.10" - ip_addr_add "$h1.10" 192.0.2.1/28 + adf_ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 + adf_ip_link_set_up "$h1.10" + adf_ip_addr_add "$h1.10" 192.0.2.1/28 - ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 - ip_link_set_up "$h1.20" - ip_addr_add "$h1.20" 2001:db8:1::1/64 + adf_ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 + adf_ip_link_set_up "$h1.20" + adf_ip_addr_add "$h1.20" 2001:db8:1::1/64 } install_capture() @@ -152,51 +151,51 @@ install_capture() h2_create() { # $h2 - ip_link_set_up "$h2" + adf_ip_link_set_up "$h2" # H2 vrf_create "v$h2" defer vrf_destroy "v$h2" - ip_link_set_up "v$h2" + adf_ip_link_set_up "v$h2" # br2 - ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 - ip_link_set_master br2 "v$h2" - ip_link_set_up br2 + adf_ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br2 "v$h2" + adf_ip_link_set_up br2 # $h2 - ip_link_set_master "$h2" br2 + adf_ip_link_set_master "$h2" br2 install_capture "$h2" # v1$h2 - ip_link_set_up "v1$h2" - ip_link_set_master "v1$h2" br2 + adf_ip_link_set_up "v1$h2" + adf_ip_link_set_master "v1$h2" br2 } h3_create() { # $h3 - ip_link_set_up "$h3" + adf_ip_link_set_up "$h3" # H3 vrf_create "v$h3" defer vrf_destroy "v$h3" - ip_link_set_up "v$h3" + adf_ip_link_set_up "v$h3" # br3 - ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 - ip_link_set_master br3 "v$h3" - ip_link_set_up br3 + adf_ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br3 "v$h3" + adf_ip_link_set_up br3 # $h3 - ip_link_set_master "$h3" br3 + adf_ip_link_set_master "$h3" br3 install_capture "$h3" # v1$h3 - ip_link_set_up "v1$h3" - ip_link_set_master "v1$h3" br3 + adf_ip_link_set_up "v1$h3" + adf_ip_link_set_master "v1$h3" br3 } switch_create() @@ -205,35 +204,35 @@ switch_create() # br1 swp1_mac=$(mac_get "$swp1") - ip_link_add br1 type bridge vlan_filtering 1 \ + adf_ip_link_add br1 type bridge vlan_filtering 1 \ vlan_default_pvid 0 mcast_snooping 0 - ip_link_set_addr br1 "$swp1_mac" - ip_link_set_up br1 + adf_ip_link_set_addr br1 "$swp1_mac" + adf_ip_link_set_up br1 # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test. - ip_link_add "X$IPMR" up type dummy + adf_ip_link_add "X$IPMR" up type dummy # IPMR - ip_link_add "$IPMR" up type dummy - ip_addr_add "$IPMR" 192.0.2.100/28 - ip_addr_add "$IPMR" 2001:db8:4::1/64 + adf_ip_link_add "$IPMR" up type dummy + adf_ip_addr_add "$IPMR" 192.0.2.100/28 + adf_ip_addr_add "$IPMR" 2001:db8:4::1/64 # $swp1 - ip_link_set_up "$swp1" - ip_link_set_master "$swp1" br1 - bridge_vlan_add vid 10 dev "$swp1" - bridge_vlan_add vid 20 dev "$swp1" + adf_ip_link_set_up "$swp1" + adf_ip_link_set_master "$swp1" br1 + adf_bridge_vlan_add vid 10 dev "$swp1" + adf_bridge_vlan_add vid 20 dev "$swp1" # $swp2 - ip_link_set_up "$swp2" - ip_addr_add "$swp2" 192.0.2.33/28 - ip_addr_add "$swp2" 2001:db8:2::1/64 + adf_ip_link_set_up "$swp2" + adf_ip_addr_add "$swp2" 192.0.2.33/28 + adf_ip_addr_add "$swp2" 2001:db8:2::1/64 # $swp3 - ip_link_set_up "$swp3" - ip_addr_add "$swp3" 192.0.2.65/28 - ip_addr_add "$swp3" 2001:db8:3::1/64 + adf_ip_link_set_up "$swp3" + adf_ip_addr_add "$swp3" 192.0.2.65/28 + adf_ip_addr_add "$swp3" 2001:db8:3::1/64 } vx_create() @@ -241,11 +240,11 @@ vx_create() local name=$1; shift local vid=$1; shift - ip_link_add "$name" up type vxlan dstport "$VXPORT" \ + adf_ip_link_add "$name" up type vxlan dstport "$VXPORT" \ nolearning noudpcsum tos inherit ttl 16 \ "$@" - ip_link_set_master "$name" br1 - bridge_vlan_add vid "$vid" dev "$name" pvid untagged + adf_ip_link_set_master "$name" br1 + adf_bridge_vlan_add vid "$vid" dev "$name" pvid untagged } export -f vx_create @@ -290,39 +289,38 @@ ns_init_common() local ipv6_host=$1; shift # v2$h2 / v2$h3 - ip_link_set_up "$if_in" - ip_addr_add "$if_in" "$ipv4_in" - ip_addr_add "$if_in" "$ipv6_in" + adf_ip_link_set_up "$if_in" + adf_ip_addr_add "$if_in" "$ipv4_in" + adf_ip_addr_add "$if_in" "$ipv6_in" # br1 - ip_link_add br1 type bridge vlan_filtering 1 \ + adf_ip_link_add br1 type bridge vlan_filtering 1 \ vlan_default_pvid 0 mcast_snooping 0 - ip_link_set_up br1 + adf_ip_link_set_up br1 # vx10, vx20 vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in" vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in" # w1 - ip_link_add w1 type veth peer name w2 - ip_link_set_master w1 br1 - ip_link_set_up w1 - bridge_vlan_add vid 10 dev w1 - bridge_vlan_add vid 20 dev w1 + adf_ip_link_add w1 type veth peer name w2 + adf_ip_link_set_master w1 br1 + adf_ip_link_set_up w1 + adf_bridge_vlan_add vid 10 dev w1 + adf_bridge_vlan_add vid 20 dev w1 # w2 - simple_if_init w2 - defer simple_if_fini w2 + adf_simple_if_init w2 # w2.10 - ip_link_add w2.10 master vw2 link w2 type vlan id 10 - ip_link_set_up w2.10 - ip_addr_add w2.10 "$ipv4_host" + adf_ip_link_add w2.10 master vw2 link w2 type vlan id 10 + adf_ip_link_set_up w2.10 + adf_ip_addr_add w2.10 "$ipv4_host" # w2.20 - ip_link_add w2.20 master vw2 link w2 type vlan id 20 - ip_link_set_up w2.20 - ip_addr_add w2.20 "$ipv6_host" + adf_ip_link_add w2.20 master vw2 link w2 type vlan id 20 + adf_ip_link_set_up w2.20 + adf_ip_addr_add w2.20 "$ipv6_host" } export -f ns_init_common @@ -371,14 +369,11 @@ setup_prepare() swp3=${NETIFS[p5]} h3=${NETIFS[p6]} - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare + adf_forwarding_enable - forwarding_enable - defer forwarding_restore - - ip_link_add "v1$h2" type veth peer name "v2$h2" - ip_link_add "v1$h3" type veth peer name "v2$h3" + adf_ip_link_add "v1$h2" type veth peer name "v2$h2" + adf_ip_link_add "v1$h3" type veth peer name "v2$h3" h1_create h2_create @@ -720,7 +715,7 @@ ipv4_mcroute_fdb_oif0_sep() { adf_install_sg_sep - ip_addr_add lo 192.0.2.120/28 + adf_ip_addr_add lo 192.0.2.120/28 vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" @@ -731,7 +726,7 @@ ipv4_mcroute_fdb_oif0_sep_rx() { adf_install_sg_sep_rx lo - ip_addr_add lo 192.0.2.120/28 + adf_ip_addr_add lo 192.0.2.120/28 vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" @@ -742,7 +737,7 @@ ipv4_mcroute_fdb_sep_rx() { adf_install_sg_sep_rx lo - ip_addr_add lo 192.0.2.120/28 + adf_ip_addr_add lo 192.0.2.120/28 vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add \ @@ -754,7 +749,7 @@ ipv6_mcroute_fdb_sep_rx() { adf_install_sg_sep_rx "X$IPMR" - ip_addr_add "X$IPMR" 2001:db8:5::1/64 + adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64 vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute bridge -6 fdb del dev vx20 00:00:00:00:00:00 bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh index 46c31794b91b..709845123727 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -47,8 +47,7 @@ source lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/28 - defer simple_if_fini $h1 192.0.2.1/28 + adf_simple_if_init $h1 192.0.2.1/28 tc qdisc add dev $h1 clsact defer tc qdisc del dev $h1 clsact @@ -60,24 +59,23 @@ h1_create() switch_create() { - ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 # Make sure the bridge uses the MAC address of the local port and not # that of the VxLAN's device. - ip_link_set_addr br1 $(mac_get $swp1) - ip_link_set_up br1 + adf_ip_link_set_addr br1 $(mac_get $swp1) + adf_ip_link_set_up br1 - ip_link_set_up $rp1 - ip_addr_add $rp1 192.0.2.17/28 - ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + adf_ip_link_set_up $rp1 + adf_ip_addr_add $rp1 192.0.2.17/28 + adf_ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 - ip_link_set_master $swp1 br1 - ip_link_set_up $swp1 + adf_ip_link_set_master $swp1 br1 + adf_ip_link_set_up $swp1 } vrp2_create() { - simple_if_init $rp2 192.0.2.18/28 - defer simple_if_fini $rp2 192.0.2.18/28 + adf_simple_if_init $rp2 192.0.2.18/28 } setup_prepare() @@ -88,11 +86,8 @@ setup_prepare() rp1=${NETIFS[p3]} rp2=${NETIFS[p4]} - vrf_prepare - defer vrf_cleanup - - forwarding_enable - defer forwarding_restore + adf_vrf_prepare + adf_forwarding_enable h1_create switch_create @@ -200,10 +195,10 @@ vxlan_ping_do() vxlan_device_add() { - ip_link_add vx1 up type vxlan id 1000 \ + adf_ip_link_add vx1 up type vxlan id 1000 \ local 192.0.2.17 dstport "$VXPORT" \ nolearning noudpcsum tos inherit ttl 100 "$@" - ip_link_set_master vx1 br1 + adf_ip_link_set_master vx1 br1 } vxlan_all_reserved_bits() diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index d5824eadea10..2b1d9f2b3e9e 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -93,6 +93,7 @@ static bool tx_socket = true; static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; +static bool ipip; static const int num_flush_id_cases = 6; static void vlog(const char *fmt, ...) @@ -114,7 +115,9 @@ static void setup_sock_filter(int fd) int ipproto_off, opt_ipproto_off; int next_off; - if (proto == PF_INET) + if (ipip) + next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol); + else if (proto == PF_INET) next_off = offsetof(struct iphdr, protocol); else next_off = offsetof(struct ipv6hdr, nexthdr); @@ -244,7 +247,7 @@ static void fill_datalinklayer(void *buf) eth->h_proto = ethhdr_proto; } -static void fill_networklayer(void *buf, int payload_len) +static void fill_networklayer(void *buf, int payload_len, int protocol) { struct ipv6hdr *ip6h = buf; struct iphdr *iph = buf; @@ -254,7 +257,7 @@ static void fill_networklayer(void *buf, int payload_len) ip6h->version = 6; ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->nexthdr = protocol; ip6h->hop_limit = 8; if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) error(1, errno, "inet_pton source ip6"); @@ -266,7 +269,7 @@ static void fill_networklayer(void *buf, int payload_len) iph->version = 4; iph->ihl = 5; iph->ttl = 8; - iph->protocol = IPPROTO_TCP; + iph->protocol = protocol; iph->tot_len = htons(sizeof(struct tcphdr) + payload_len + sizeof(struct iphdr)); iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ @@ -313,9 +316,19 @@ static void create_packet(void *buf, int seq_offset, int ack_offset, { memset(buf, 0, total_hdr_len); memset(buf + total_hdr_len, 'a', payload_len); + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, payload_len, fin); - fill_networklayer(buf + ETH_HLEN, payload_len); + + if (ipip) { + fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr), + IPPROTO_IPIP); + fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr), + payload_len, IPPROTO_TCP); + } else { + fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP); + } + fill_datalinklayer(buf); } @@ -416,6 +429,13 @@ static void recompute_packet(char *buf, char *no_ext, int extlen) iph->tot_len = htons(ntohs(iph->tot_len) + extlen); iph->check = 0; iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + + if (ipip) { + iph += 1; + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } } else { ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); } @@ -670,7 +690,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(9); break; - case 3: /* DF=0, Fixed - should not coalesce */ + case 3: /* DF=0, Fixed - should coalesce */ iph1->frag_off &= ~htons(IP_DF); iph1->id = htons(8); @@ -777,7 +797,7 @@ static void send_fragment4(int fd, struct sockaddr_ll *daddr) */ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); - fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP); fill_datalinklayer(buf); iph->frag_off = htons(0x6000); // DF = 1, MF = 1 @@ -1071,7 +1091,7 @@ static void gro_sender(void) * and min ipv6hdr size. Like MAX_HDR_SIZE, * MAX_PAYLOAD is defined with the larger header of the two. */ - int offset = proto == PF_INET ? 20 : 0; + int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; send_large(txfd, &daddr, remainder); @@ -1188,10 +1208,9 @@ static void gro_receiver(void) correct_payload[0] = PAYLOAD_LEN * 2; check_recv_pkts(rxfd, correct_payload, 1); - printf("DF=0, Fixed - should not coalesce: "); - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); + printf("DF=0, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); correct_payload[0] = PAYLOAD_LEN * 2; @@ -1222,7 +1241,7 @@ static void gro_receiver(void) check_recv_pkts(rxfd, correct_payload, 2); } } else if (strcmp(testname, "large") == 0) { - int offset = proto == PF_INET ? 20 : 0; + int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; correct_payload[0] = (MAX_PAYLOAD + offset); @@ -1251,6 +1270,7 @@ static void parse_args(int argc, char **argv) { "iface", required_argument, NULL, 'i' }, { "ipv4", no_argument, NULL, '4' }, { "ipv6", no_argument, NULL, '6' }, + { "ipip", no_argument, NULL, 'e' }, { "rx", no_argument, NULL, 'r' }, { "saddr", required_argument, NULL, 's' }, { "smac", required_argument, NULL, 'S' }, @@ -1260,7 +1280,7 @@ static void parse_args(int argc, char **argv) }; int c; - while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1270,6 +1290,11 @@ static void parse_args(int argc, char **argv) proto = PF_INET6; ethhdr_proto = htons(ETH_P_IPV6); break; + case 'e': + ipip = true; + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; case 'd': addr4_dst = addr6_dst = optarg; break; @@ -1305,7 +1330,10 @@ int main(int argc, char **argv) { parse_args(argc, argv); - if (proto == PF_INET) { + if (ipip) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET) { tcp_offset = ETH_HLEN + sizeof(struct iphdr); total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else if (proto == PF_INET6) { diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 9e3f186bc2a1..4c5144c6f652 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -4,7 +4,7 @@ readonly SERVER_MAC="aa:00:00:00:00:02" readonly CLIENT_MAC="aa:00:00:00:00:01" readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") -readonly PROTOS=("ipv4" "ipv6") +readonly PROTOS=("ipv4" "ipv6" "ipip") dev="" test="all" proto="ipv4" diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..267ef62b5c72 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield <bacs@librecast.net> + * + * Kernel selftest for the IPv6 fragmentation regression which affected stable + * kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable + * without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue by sending an IPv6 UDP packet to + * localhost (::1) on the loopback interface from the autoconfigured link-local + * address. + * + * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1 + * (EMSGSIZE) when the regression is present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> +#include "../kselftest.h" + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +static void setup(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int ctl; + + /* we need to set MTU, so do this in a namespace to play nicely */ + if (unshare(CLONE_NEWNET) == -1) + error(KSFT_FAIL, errno, "unshare"); + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + error(KSFT_FAIL, errno, "socket"); + + /* ensure MTU is smaller than what we plan to send */ + ifr.ifr_mtu = MTU; + if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: set MTU"); + + /* bring up interface */ + if (ioctl(ctl, SIOCGIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS"); + ifr.ifr_flags = ifr.ifr_flags | IFF_UP; + if (ioctl(ctl, SIOCSIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: bring interface up"); + + if (close(ctl) == -1) + error(KSFT_FAIL, errno, "close"); +} + +int main(void) +{ + struct in6_addr addr = { + .s6_addr[15] = 0x01, /* ::1 */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = htons(9) /* port 9/udp (DISCARD) */ + }; + static char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int s; + + printf("Testing IPv6 fragmentation\n"); + setup(); + s = socket(AF_INET6, SOCK_DGRAM, 0); +send_again: + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + /* if interface wasn't ready, try again */ + if (errno == EADDRNOTAVAIL) { + usleep(1000); + goto send_again; + } + error(KSFT_FAIL, errno, "sendmsg"); + } else if (rc != LARGER_THAN_MTU) { + error(KSFT_FAIL, errno, "sendmsg returned %zi, expected %i", + rc, LARGER_THAN_MTU); + } + printf("[PASS] sendmsg() returned %zi\n", rc); + if (close(s) == -1) + error(KSFT_FAIL, errno, "close"); + return KSFT_PASS; +} diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index c7add0dc4c60..feba4ef69a54 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -543,31 +543,31 @@ require_command() fi } -ip_link_add() +adf_ip_link_add() { local name=$1; shift - ip link add name "$name" "$@" - defer ip link del dev "$name" + ip link add name "$name" "$@" && \ + defer ip link del dev "$name" } -ip_link_set_master() +adf_ip_link_set_master() { local member=$1; shift local master=$1; shift - ip link set dev "$member" master "$master" - defer ip link set dev "$member" nomaster + ip link set dev "$member" master "$master" && \ + defer ip link set dev "$member" nomaster } -ip_link_set_addr() +adf_ip_link_set_addr() { local name=$1; shift local addr=$1; shift local old_addr=$(mac_get "$name") - ip link set dev "$name" address "$addr" - defer ip link set dev "$name" address "$old_addr" + ip link set dev "$name" address "$addr" && \ + defer ip link set dev "$name" address "$old_addr" } ip_link_has_flag() @@ -585,44 +585,44 @@ ip_link_is_up() ip_link_has_flag "$1" UP } -ip_link_set_up() +adf_ip_link_set_up() { local name=$1; shift if ! ip_link_is_up "$name"; then - ip link set dev "$name" up - defer ip link set dev "$name" down + ip link set dev "$name" up && \ + defer ip link set dev "$name" down fi } -ip_link_set_down() +adf_ip_link_set_down() { local name=$1; shift if ip_link_is_up "$name"; then - ip link set dev "$name" down - defer ip link set dev "$name" up + ip link set dev "$name" down && \ + defer ip link set dev "$name" up fi } -ip_addr_add() +adf_ip_addr_add() { local name=$1; shift - ip addr add dev "$name" "$@" - defer ip addr del dev "$name" "$@" + ip addr add dev "$name" "$@" && \ + defer ip addr del dev "$name" "$@" } -ip_route_add() +adf_ip_route_add() { - ip route add "$@" - defer ip route del "$@" + ip route add "$@" && \ + defer ip route del "$@" } -bridge_vlan_add() +adf_bridge_vlan_add() { - bridge vlan add "$@" - defer bridge vlan del "$@" + bridge vlan add "$@" && \ + defer bridge vlan del "$@" } wait_local_port_listen() @@ -645,3 +645,27 @@ wait_local_port_listen() sleep 0.1 done } + +cmd_jq() +{ + local cmd=$1 + local jq_exp=$2 + local jq_opts=$3 + local ret + local output + + output="$($cmd)" + # it the command fails, return error right away + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output + # return success only in case of non-empty output + [ ! -z "$output" ] +} diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 02be28dcc089..997b85cc216a 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily -from .ynl import NetshaperFamily, DevlinkFamily +from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 8e35ed12ed9e..83b1574f7719 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -72,6 +72,11 @@ def ksft_true(a, comment=""): _fail("Check failed", a, "does not eval to True", comment) +def ksft_not_none(a, comment=""): + if a is None: + _fail("Check failed", a, "is None", comment) + + def ksft_in(a, b, comment=""): if a not in b: _fail("Check failed", a, "not in", b, comment) @@ -92,6 +97,11 @@ def ksft_ge(a, b, comment=""): _fail("Check failed", a, "<", b, comment) +def ksft_gt(a, b, comment=""): + if a <= b: + _fail("Check failed", a, "<=", b, comment) + + def ksft_lt(a, b, comment=""): if a >= b: _fail("Check failed", a, ">=", b, comment) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index f395c90fb0f1..cb40ecef9456 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -import errno import json as _json import os -import random import re import select import socket @@ -21,17 +19,19 @@ def fd_read_timeout(fd, timeout): rlist, _, _ = select.select([fd], [], [], timeout) if rlist: return os.read(fd, 1024) - else: - raise TimeoutError("Timeout waiting for fd read") + raise TimeoutError("Timeout waiting for fd read") class cmd: """ Execute a command on local or remote host. + @shell defaults to false, and class will try to split @comm into a list + if it's a string with spaces. + Use bkg() instead to run a command in the background. """ - def __init__(self, comm, shell=True, fail=True, ns=None, background=False, + def __init__(self, comm, shell=None, fail=True, ns=None, background=False, host=None, timeout=5, ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm @@ -45,6 +45,10 @@ class cmd: if host: self.proc = host.cmd(comm) else: + # If user doesn't explicitly request shell try to avoid it. + if shell is None and isinstance(comm, str) and ' ' in comm: + comm = comm.split() + # ksft_wait lets us wait for the background process to fully start, # we pass an FD to the child process, and wait for it to write back. # Similarly term_fd tells child it's time to exit. @@ -111,12 +115,13 @@ class bkg(cmd): with bkg("my_binary", ksft_wait=5): """ - def __init__(self, comm, shell=True, fail=None, ns=None, host=None, + def __init__(self, comm, shell=None, fail=None, ns=None, host=None, exit_wait=False, ksft_wait=None): super().__init__(comm, background=True, shell=shell, fail=fail, ns=ns, host=host, ksft_wait=ksft_wait) self.terminate = not exit_wait and not ksft_wait + self._exit_wait = exit_wait self.check_fail = fail if shell and self.terminate: @@ -127,7 +132,9 @@ class bkg(cmd): return self def __exit__(self, ex_type, ex_value, ex_tb): - return self.process(terminate=self.terminate, fail=self.check_fail) + # Force termination on exception + terminate = self.terminate or (self._exit_wait and ex_type) + return self.process(terminate=terminate, fail=self.check_fail) global_defer_queue = [] @@ -135,8 +142,6 @@ global_defer_queue = [] class defer: def __init__(self, func, *args, **kwargs): - global global_defer_queue - if not callable(func): raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") @@ -224,11 +229,11 @@ def bpftrace(expr, json=None, ns=None, host=None, timeout=None): return cmd_obj -def rand_port(type=socket.SOCK_STREAM): +def rand_port(stype=socket.SOCK_STREAM): """ Get a random unprivileged port. """ - with socket.socket(socket.AF_INET6, type) as s: + with socket.socket(socket.AF_INET6, stype) as s: s.bind(("", 0)) return s.getsockname()[1] @@ -249,3 +254,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin if time.monotonic() > end: raise Exception("Waiting for port listen timed out") time.sleep(sleep) + + +def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'): + """ + Wait for file contents on the local system to satisfy a condition. + test_fn() should take one argument (file contents) and return whether + condition is met. + """ + end = time.monotonic() + deadline + + with open(fname, "r", encoding=encoding) as fp: + while True: + if test_fn(fp.read()): + break + fp.seek(0) + if time.monotonic() > end: + raise TimeoutError("Wait for file contents failed", fname) + time.sleep(sleep) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 2b3a61ea3bfa..32c223e93b2c 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -61,3 +61,8 @@ class DevlinkFamily(YnlFamily): def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), schema='', recv_size=recv_size) + +class PSPFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('psp.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh index 082f5d38321b..47ab78c4d465 100644 --- a/tools/testing/selftests/net/lib/sh/defer.sh +++ b/tools/testing/selftests/net/lib/sh/defer.sh @@ -1,6 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Whether to pause and allow debugging when an executed deferred command has a +# non-zero exit code. +: "${DEFER_PAUSE_ON_FAIL:=no}" + # map[(scope_id,track,cleanup_id) -> cleanup_command] # track={d=default | p=priority} declare -A __DEFER__JOBS @@ -38,8 +42,20 @@ __defer__run() local track=$1; shift local defer_ix=$1; shift local defer_key=$(__defer__defer_key $track $defer_ix) + local ret + + eval ${__DEFER__JOBS[$defer_key]} + ret=$? + + if [[ "$DEFER_PAUSE_ON_FAIL" == yes && "$ret" -ne 0 ]]; then + echo "Deferred command (track $track index $defer_ix):" + echo " ${__DEFER__JOBS[$defer_key]}" + echo "... ended with an exit status of $ret" + echo "Hit enter to continue, 'q' to quit" + read a + [[ "$a" == q ]] && exit 1 + fi - ${__DEFER__JOBS[$defer_key]} unset __DEFER__JOBS[$defer_key] } @@ -49,7 +65,7 @@ __defer__schedule() local ndefers=$(__defer__ndefers $track) local ndefers_key=$(__defer__ndefer_key $track) local defer_key=$(__defer__defer_key $track $ndefers) - local defer="$@" + local defer="${@@Q}" __DEFER__JOBS[$defer_key]="$defer" __DEFER__NJOBS[$ndefers_key]=$((ndefers + 1)) diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index df4eea5c192b..c368fc045f4b 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -420,7 +420,6 @@ static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) { - void *data = (void *)(long)ctx->data; struct udphdr *udph = NULL; __s32 *adjust_offset, *val; __u32 key, hdr_len; @@ -432,7 +431,8 @@ static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) if (!udph) return XDP_PASS; - hdr_len = (void *)udph - data + sizeof(struct udphdr); + hdr_len = (void *)udph - (void *)(long)ctx->data + + sizeof(struct udphdr); key = XDP_ADJST_OFFSET; adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); if (!adjust_offset) @@ -572,8 +572,6 @@ static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; struct udphdr *udph_ptr = NULL; __u32 key, size, hdr_len; __s32 *val; @@ -584,7 +582,8 @@ static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) if (!udph_ptr) return XDP_PASS; - hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr); + hdr_len = (void *)udph_ptr - (void *)(long)ctx->data + + sizeof(struct udphdr); key = XDP_ADJST_OFFSET; val = bpf_map_lookup_elem(&map_xdp_setup, &key); diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index f3bcaa48df8f..8e8f6441ad8b 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -502,6 +502,7 @@ static int server(int unixfd) process_one_client(r, unixfd); + close(fd); return 0; } @@ -580,8 +581,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(unixfds[1]); + if (s == 0) { + close(unixfds[0]); + ret = server(unixfds[1]); + close(unixfds[1]); + return ret; + } close(unixfds[1]); diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 7fd555b123b9..c90d8e8b95cb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -74,6 +74,17 @@ unset join_create_err unset join_bind_err unset join_connect_err +unset fb_ns1 +unset fb_ns2 +unset fb_infinite_map_tx +unset fb_dss_corruption +unset fb_simult_conn +unset fb_mpc_passive +unset fb_mpc_active +unset fb_mpc_data +unset fb_md5_sig +unset fb_dss + # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, @@ -347,6 +358,7 @@ reset_with_add_addr_timeout() tables="${ip6tables}" fi + # set a maximum, to avoid too long timeout with exponential backoff ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ @@ -1399,6 +1411,115 @@ chk_join_tx_nr() print_results "join Tx" ${rc} } +chk_fallback_nr() +{ + local infinite_map_tx=${fb_infinite_map_tx:-0} + local dss_corruption=${fb_dss_corruption:-0} + local simult_conn=${fb_simult_conn:-0} + local mpc_passive=${fb_mpc_passive:-0} + local mpc_active=${fb_mpc_active:-0} + local mpc_data=${fb_mpc_data:-0} + local md5_sig=${fb_md5_sig:-0} + local dss=${fb_dss:-0} + local rc=${KSFT_PASS} + local ns=$1 + local count + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$infinite_map_tx" ]; then + rc=${KSFT_FAIL} + print_check "$ns infinite map tx fallback" + fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss_corruption" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss corruption fallback" + fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$simult_conn" ]; then + rc=${KSFT_FAIL} + print_check "$ns simult conn fallback" + fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_passive" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc passive fallback" + fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_active" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc active fallback" + fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_data" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc data fallback" + fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$md5_sig" ]; then + rc=${KSFT_FAIL} + print_check "$ns MD5 Sig fallback" + fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss fallback" + fail_test "got $count dss fallback[s] in $ns expected $dss" + fi + + return $rc +} + +chk_fallback_nr_all() +{ + local netns=("ns1" "ns2") + local fb_ns=("fb_ns1" "fb_ns2") + local rc=${KSFT_PASS} + + for i in 0 1; do + if [ -n "${!fb_ns[i]}" ]; then + eval "${!fb_ns[i]}" \ + chk_fallback_nr ${netns[i]} || rc=${?} + else + chk_fallback_nr ${netns[i]} || rc=${?} + fi + done + + if [ "${rc}" != "${KSFT_PASS}" ]; then + print_results "fallback" ${rc} + fi +} + chk_join_nr() { local syn_nr=$1 @@ -1484,6 +1605,8 @@ chk_join_nr() join_syn_tx="${join_syn_tx:-${syn_nr}}" \ chk_join_tx_nr + chk_fallback_nr_all + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1547,7 +1670,6 @@ chk_add_nr() local tx="" local rx="" local count - local timeout if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 @@ -1556,15 +1678,13 @@ chk_add_nr() rx=" server" fi - timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) - print_check "add addr rx${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_nr" ] && + { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then fail_test "got $count ADD_ADDR[s] expected $add_nr" else print_ok @@ -1652,18 +1772,15 @@ chk_add_tx_nr() { local add_tx_nr=$1 local echo_tx_nr=$2 - local timeout local count - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) - print_check "add addr tx" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_tx_nr" ] && + { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr" else print_ok @@ -2151,7 +2268,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 3 3 fi @@ -2163,7 +2281,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 join_syn_tx=3 \ chk_join_nr 1 1 1 chk_add_nr 3 3 @@ -2201,6 +2320,74 @@ signal_address_tests() fi } +laminar_endp_tests() +{ + # no laminar endpoints: routing rules are used + if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # laminar endpoints: this endpoint is used + if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # laminar endpoints: these endpoints are used + if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 dead:beef:3::2 flags laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + pm_nl_add_endpoint $ns2 10.0.4.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + fi + + # laminar endpoints: only one endpoint is used + if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 2 2 + fi + + # laminar endpoints: subflow and laminar flags + if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 2 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi +} + link_failure_tests() { # accept and use add_addr with additional subflows and link loss @@ -3187,6 +3374,17 @@ deny_join_id0_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 fi + + # default limits, server deny join id 0 + signal + if reset_with_allow_join_id0 "default limits, server deny join id 0" 0 1; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi } fullmesh_tests() @@ -3337,6 +3535,7 @@ fail_tests() join_csum_ns1=+1 join_csum_ns2=+0 \ join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ join_corrupted_pkts="$(pedit_action_pkts)" \ + fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \ chk_join_nr 0 0 0 chk_fail_nr 1 -1 invert fi @@ -3978,6 +4177,7 @@ all_tests_sorted=( f@subflows_tests e@subflows_error_tests s@signal_address_tests + L@laminar_endp_tests l@link_failure_tests t@add_addr_timeout_tests r@remove_tests diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 112c07c4c37a..286164f7246e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -726,6 +726,7 @@ static int server(int pipefd) process_one_client(r, pipefd); + close(fd); return 0; } @@ -851,8 +852,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(pipefds[1]); + if (s == 0) { + close(pipefds[0]); + ret = server(pipefds[1]); + close(pipefds[1]); + return ret; + } close(pipefds[1]); diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 93fea3442216..65b374232ff5 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -2,6 +2,7 @@ #include <errno.h> #include <error.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -113,6 +114,8 @@ static int capture_events(int fd, int event_group) error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group"); do { + bool server_side = false; + FD_ZERO(&rfds); FD_SET(fd, &rfds); res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) + @@ -187,18 +190,22 @@ static int capture_events(int fd, int event_group) else if (attrs->rta_type == MPTCP_ATTR_ERROR) fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs)); else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE) - fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs)); + server_side = !!*(__u8 *)RTA_DATA(attrs); else if (attrs->rta_type == MPTCP_ATTR_FLAGS) { __u16 flags = *(__u16 *)RTA_DATA(attrs); /* only print when present, easier */ if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0) fprintf(stderr, ",deny_join_id0:1"); + if (flags & MPTCP_PM_EV_FLAG_SERVER_SIDE) + server_side = true; } attrs = RTA_NEXT(attrs, msg_len); } } + if (server_side) + fprintf(stderr, ",server_side:1"); fprintf(stderr, "\n"); } while (1); @@ -823,6 +830,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; else if (!strcmp(tok, "signal")) flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; + else if (!strcmp(tok, "laminar")) + flags |= MPTCP_PM_ADDR_FLAG_LAMINAR; else if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else if (!strcmp(tok, "fullmesh")) @@ -1011,6 +1020,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { + printf("laminar"); + flags &= ~MPTCP_PM_ADDR_FLAG_LAMINAR; + if (flags) + printf(","); + } + if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) { printf("backup"); flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3d45991f24ed..87323942cb8a 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -241,7 +241,7 @@ make_connection() print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] && - [ "${client_serverside}" = 0 ] && [ "${server_serverside}" = 1 ] && + [ "${client_serverside:-0}" = 0 ] && [ "${server_serverside:-0}" = 1 ] && [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ] then test_pass diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 79d5b33966ba..305e46b819cb 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -13,6 +13,7 @@ CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_INET_ESP=m +CONFIG_CRYPTO_SHA1=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP_NF_IPTABLES=m diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 20e76b395c85..ad97c6227f35 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -29,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -408,6 +408,18 @@ perf_duration 0 " +TYPE_doublecreate=" +display cannot create same element twice +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1900,6 +1912,48 @@ test_bug_avx2_mismatch() fi } +test_bug_doublecreate() +{ + local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4" + local ret=1 + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + add "{ $elements }" || return 1 + # expected to work: 'add' on existing should be no-op. + add "{ $elements }" || return 1 + + # 'create' should return an error. + if nft create element inet filter test "{ $elements }" 2>/dev/null; then + err "Could create an existing element" + return 1 + fi +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -eq 0 ]; then + err "Could create element twice in one transaction" + err "$(nft -a list ruleset)" + return 1 + fi + +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -ne 0 ]; then + err "Could not flush and re-create element in one transaction" + return 1 + fi + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" } diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh index a954754b99b3..b3ec2d0a3f56 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat.sh @@ -569,7 +569,7 @@ test_redirect6() ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then - echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" lret=1 fi @@ -859,7 +859,7 @@ EOF # from router:service bypass connection tracking. test_port_shadow_notrack "$family" - # test nat based mitigation: fowarded packets coming from service port + # test nat based mitigation: forwarded packets coming from service port # are masqueraded with random highport. test_port_shadow_pat "$family" diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c index 07423f256f96..7618ebe528a4 100644 --- a/tools/testing/selftests/net/netlink-dumps.c +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -31,9 +31,18 @@ struct ext_ack { const char *str; }; -/* 0: no done, 1: done found, 2: extack found, -1: error */ -static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) +enum get_ea_ret { + ERROR = -1, + NO_CTRL = 0, + FOUND_DONE, + FOUND_ERR, + FOUND_EXTACK, +}; + +static enum get_ea_ret +nl_get_extack(char *buf, size_t n, struct ext_ack *ea) { + enum get_ea_ret ret = NO_CTRL; const struct nlmsghdr *nlh; const struct nlattr *attr; ssize_t rem; @@ -41,15 +50,19 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) { nlh = (struct nlmsghdr *)&buf[n - rem]; if (!NLMSG_OK(nlh, rem)) - return -1; + return ERROR; - if (nlh->nlmsg_type != NLMSG_DONE) + if (nlh->nlmsg_type == NLMSG_ERROR) + ret = FOUND_ERR; + else if (nlh->nlmsg_type == NLMSG_DONE) + ret = FOUND_DONE; + else continue; ea->err = -*(int *)NLMSG_DATA(nlh); if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) - return 1; + return ret; ynl_attr_for_each(attr, nlh, sizeof(int)) { switch (ynl_attr_type(attr)) { @@ -68,10 +81,10 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) } } - return 2; + return FOUND_EXTACK; } - return 0; + return ret; } static const struct { @@ -99,9 +112,9 @@ static const struct { TEST(dump_extack) { int netlink_sock; + int i, cnt, ret; char buf[8192]; int one = 1; - int i, cnt; ssize_t n; netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); @@ -118,7 +131,7 @@ TEST(dump_extack) ASSERT_EQ(n, 0); /* Dump so many times we fill up the buffer */ - cnt = 64; + cnt = 80; for (i = 0; i < cnt; i++) { n = send(netlink_sock, &dump_neigh_bad, sizeof(dump_neigh_bad), 0); @@ -140,10 +153,20 @@ TEST(dump_extack) } ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); - EXPECT_EQ(nl_get_extack(buf, n, &ea), 2); + ret = nl_get_extack(buf, n, &ea); + /* Once we fill the buffer we'll see one ENOBUFS followed + * by a number of EBUSYs. Then the last recv() will finally + * trigger and complete the dump. + */ + if (ret == FOUND_ERR && (ea.err == ENOBUFS || ea.err == EBUSY)) + continue; + EXPECT_EQ(ret, FOUND_EXTACK); + EXPECT_EQ(ea.err, EINVAL); EXPECT_EQ(ea.attr_offs, sizeof(struct nlmsghdr) + sizeof(struct ndmsg)); } + /* Make sure last message was a full DONE+extack */ + EXPECT_EQ(ret, FOUND_EXTACK); } static const struct { diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 8a0396bfaf99..b521e0dea506 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket): elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE: up.execute(msg) else: - print("Unkonwn cmd: %d" % msg["cmd"]) + print("Unknown cmd: %d" % msg["cmd"]) except NetlinkError as ne: raise ne diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c index 9201f2905f2c..688a5fa6fdac 100644 --- a/tools/testing/selftests/net/ovpn/ovpn-cli.c +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -32,9 +32,10 @@ #include <sys/socket.h> +#include "../../kselftest.h" + /* defines to make checkpatch happy */ #define strscpy strncpy -#define __always_unused __attribute__((__unused__)) /* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we * have to explicitly do it to prevent the kernel from failing upon diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh index 1095a7b22f44..37edd3dc3b07 100755 --- a/tools/testing/selftests/net/packetdrill/defaults.sh +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -51,7 +51,8 @@ sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 -sysctl -q net.ipv4.tcp_fastopen=0x70403 +sysctl -q net.ipv4.tcp_fastopen=0x3 +# Use TFO_COOKIE in ksft_runner.sh for this key. sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 sysctl -q net.ipv4.tcp_syncookies=1 diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index a7e790af38ff..b34e5cf0112e 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -3,21 +3,26 @@ source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" -readonly ipv4_args=('--ip_version=ipv4 ' - '--local_ip=192.168.0.1 ' - '--gateway_ip=192.168.0.1 ' - '--netmask_ip=255.255.0.0 ' - '--remote_ip=192.0.2.1 ' - '-D CMSG_LEVEL_IP=SOL_IP ' - '-D CMSG_TYPE_RECVERR=IP_RECVERR ') - -readonly ipv6_args=('--ip_version=ipv6 ' - '--mtu=1520 ' - '--local_ip=fd3d:0a0b:17d6::1 ' - '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' - '--remote_ip=fd3d:fa7b:d17d::1 ' - '-D CMSG_LEVEL_IP=SOL_IPV6 ' - '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') +declare -A ip_args=( + [ipv4]="--ip_version=ipv4 + --local_ip=192.168.0.1 + --gateway_ip=192.168.0.1 + --netmask_ip=255.255.0.0 + --remote_ip=192.0.2.1 + -D TFO_COOKIE=3021b9d889017eeb + -D TFO_COOKIE_ZERO=b7c12350a90dc8f5 + -D CMSG_LEVEL_IP=SOL_IP + -D CMSG_TYPE_RECVERR=IP_RECVERR" + [ipv6]="--ip_version=ipv6 + --mtu=1520 + --local_ip=fd3d:0a0b:17d6::1 + --gateway_ip=fd3d:0a0b:17d6:8888::1 + --remote_ip=fd3d:fa7b:d17d::1 + -D TFO_COOKIE=c1d1e9742a47a9bc + -D TFO_COOKIE_ZERO=82af1a8f9a205c34 + -D CMSG_LEVEL_IP=SOL_IPV6 + -D CMSG_TYPE_RECVERR=IPV6_RECVERR" +) if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0 <script>" @@ -38,12 +43,20 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then failfunc=ktap_test_xfail fi +ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2) +if [[ -z $ip_versions ]]; then + ip_versions="ipv4 ipv6" +elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then + ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions" + exit "$KSFT_FAIL" +fi + ktap_print_header -ktap_set_plan 2 +ktap_set_plan $(echo $ip_versions | wc -w) -unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv4" || $failfunc "ipv4" -unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv6" || $failfunc "ipv6" +for ip_version in $ip_versions; do + unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass $ip_version || $failfunc $ip_version +done ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt new file mode 100644 index 000000000000..eef01d5f1118 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +0 < . 1:1001(1000) ack 1 win 32792 + +0 > . 1:1(0) ack 1001 + +0 read(4, ..., 1000) = 1000 + +// resend the payload + a FIN + +0 < F. 1:1001(1000) ack 1 win 32792 +// Why do we have a delay and no dsack ? + +0~+.04 > . 1:1(0) ack 1002 + + +0 close(4) = 0 + +// According to RFC 2525, section 2.17 +// we should _not_ send an RST here, because there was no data to consume. + +0 > F. 1:1(0) ack 1002 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt new file mode 100644 index 000000000000..32aff9bc4052 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_COOKIE_NOT_REQD flag on receiving +// SYN with data but without Fast Open cookie option. + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x202` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Since TFO_SERVER_COOKIE_NOT_REQD, a TFO socket will be created with +// the data accepted. + +0 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 read(4, ..., 1024) = 1000 + +// Data After SYN will be accepted too. + +0 < . 1001:2001(1000) ack 1 win 5840 + +0 > . 1:1(0) ack 2001 + +// Should change the implementation later to set the SYN flag as well. + +0 read(4, ..., 1024) = 1000 + +0 write(4, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 2001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt new file mode 100644 index 000000000000..649997a58099 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_WO_SOCKOPT1 without setsockopt(TCP_FASTOPEN) + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x402` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt new file mode 100644 index 000000000000..4a00e0d994f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Server w/o TCP_FASTOPEN socket option + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,FO TFO_COOKIE> + +// Data is ignored since TCP_FASTOPEN is not set on the listener + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// The above should block until ack comes in below. + +0 < . 1:31(30) ack 1 win 5840 + +0 accept(3, ..., ...) = 4 + + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 read(4, ..., 512) = 30 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt new file mode 100644 index 000000000000..345ed26ff7f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test that TFO-enabled server would not respond SYN-ACK with any TFO option +// when receiving a pure SYN-data. It should respond a pure SYN-ack. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN with ECN disabled because this has happened in reality + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN w/ ECN enabled + +0 `sysctl -q net.ipv4.tcp_ecn=2` + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > SE. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt new file mode 100644 index 000000000000..98e6f84497cd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO server with SYN that has TFO cookie and data. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt new file mode 100644 index 000000000000..95b1047ffdd5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test zero-payload packet w/ valid TFO cookie - a TFO socket will +// still be created and accepted but read() will not return until a +// later pkt with 10 byte. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +// A TFO socket is created and is writable. + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 1 + +0...0.300 read(4, ..., 512) = 10 + +.3 < P. 1:11(10) ack 1 win 5840 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt new file mode 100644 index 000000000000..f75efd51ed0c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// A reproducer case for a TFO SYNACK RTO undo bug in: +// 794200d66273 ("tcp: undo cwnd on Fast Open spurious SYNACK retransmit") +// This sequence that tickles this bug is: +// - Fast Open server receives TFO SYN with data, sends SYNACK +// - (client receives SYNACK and sends ACK, but ACK is lost) +// - server app sends some data packets +// - (N of the first data packets are lost) +// - server receives client ACK that has a TS ECR matching first SYNACK, +// and also SACKs suggesting the first N data packets were lost +// - server performs undo of SYNACK RTO, then immediately enters recovery +// - buggy behavior in 794200d66273 then performed an undo that caused +// the connection to be in a bad state, in CA_Open with retrans_out != 0 + +// Check that outbound TS Val ticks are as we would expect with 1000 usec per +// timestamp tick: +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:1000(1000) win 65535 <mss 1012,sackOK,TS val 1000 ecr 0,wscale 7,nop,nop,nop,FO TFO_COOKIE> + +0 > S. 0:0(0) ack 1001 <mss 1460,sackOK,TS val 2000 ecr 1000,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +// Application writes more data + +.010 write(4, ..., 10000) = 10000 + +0 > P. 1:5001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 > P. 5001:10001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1010 ecr 2000,sack 2001:5001> + +0 > P. 1:2001(2000) ack 1001 <nop,nop,TS val 2010 ecr 1010> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1011 ecr 2000,sack 2001:6001> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt new file mode 100644 index 000000000000..c3cb0e8bdcf8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the Experimental Option +// +// SYN w/ FOEXP w/o cookie must generates SYN+ACK w/ FOEXP +// w/ a valid cookie, and the cookie must be the same one +// with one generated by IANA FO + +`./defaults.sh` + +// Request a TFO cookie by Experimental Option +// This must generate the same TFO_COOKIE + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,FOEXP TFO_COOKIE> + + +0 close(3) = 0 + +// Test if FOEXP with a valid cookie creates a TFO socket + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP TFO_COOKIE> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt new file mode 100644 index 000000000000..dc09f8d9a381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a FIN pkt with the ACK bit to a TFO socket. +// The socket will go to TCP_CLOSE_WAIT state and data can be +// read until the socket is closed, at which time a FIN will be sent. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// FIN is acked and the socket goes to TCP_CLOSE_WAIT state +// in tcp_fin() called from tcp_data_queue(). + +0 < F. 11:11(0) ack 1 win 32792 + +0 > . 1:1(0) ack 12 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE_WAIT, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 close(4) = 0 + +0 > F. 1:1(0) ack 12 + * > F. 1:1(0) ack 12 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt new file mode 100644 index 000000000000..d5543672e2bd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send an ICMP host_unreachable pkt to a pending SYN_RECV req. +// +// If it's a TFO req, the ICMP error will cause it to switch +// to TCP_CLOSE state but remains in the acceptor queue. + +--ip_version=ipv4 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// Out-of-window icmp is ignored but accounted. + +0 `nstat > /dev/null` + +0 < icmp unreachable [5000:6000(1000)] + +0 `nstat | grep TcpExtOutOfWindowIcmps > /dev/null` + +// Valid ICMP unreach. + +0 < icmp unreachable host_unreachable [0:10(10)] + +// Unlike the non-TFO case, the req is still there to be accepted. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +// tcp_done_with_error() in tcp_v4_err() sets sk->sk_state +// to TCP_CLOSE + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// The 1st read will succeed and return the data in SYN + +0 read(4, ..., 512) = 10 + +// The 2nd read will fail. + +0 read(4, ..., 512) = -1 EHOSTUNREACH (No route to host) + +// But is no longer writable because it's in TCP_CLOSE state. + +0 write(4, ..., 100) = -1 EPIPE (Broken Pipe) + +// inbound pkt will trigger RST because the socket has been moved +// off the TCP hash tables. + +0 < . 1:1(0) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt new file mode 100644 index 000000000000..040d5547ed80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it has been accepted. +// +// First read() will return all the data and this is consistent +// with the non-TFO case. Second read will return -1 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// 1st read will return the data from SYN. +// tcp_reset() sets sk->sk_err to ECONNRESET for SYN_RECV. + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 32792 +// See Step 2 in tcp_validate_incoming(). + +// found_ok_skb in tcp_recvmsg_locked() + +0 read(4, ..., 512) = 10 + +// !copied && sk->sk_err -> sock_error(sk) + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt new file mode 100644 index 000000000000..7f9de6c66cbd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket before it is accepted. +// +// The socket won't go away and after it's accepted the data +// in the SYN pkt can still be read. But that's about all that +// the acceptor can do with the socket. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// 1st read will return the data from SYN. + +0 < R. 11:11(0) win 257 + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 257 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt new file mode 100644 index 000000000000..548a87701b5d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it is accepted. +// +// The socket will change to TCP_CLOSE state with pending data so +// write() will fail. Pending data can be still be read and close() +// won't trigger RST if data is not read +// +// 565b7b2d2e63 ("tcp: do not send reset to already closed sockets") +// https://lore.kernel.org/netdev/4C1A2502.1030502@openvz.org/ + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop, FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// tcp_done() sets sk->sk_state to TCP_CLOSE and clears tp->fastopen_rsk + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt new file mode 100644 index 000000000000..20090bf77655 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a fully established socket with pending data before +// it is accepted. +// +// The socket with pending data won't go away and can still be accepted +// with data read. But it will be in TCP_CLOSE state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Invalid cookie, so accept() fails. + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO aaaaaaaaaaaaaaaa,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK, FO TFO_COOKIE,nop,nop> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// Complete 3WHS and send data and RST + +0 < . 1:1(0) ack 1 win 32792 + +0 < . 1:11(10) ack 1 win 32792 + +0 < R. 11:11(0) win 32792 + +// A valid reset won't make the fully-established socket go away. +// It's just that the acceptor will get a dead, unusable socket +// in TCP_CLOSE state. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt new file mode 100644 index 000000000000..9f52d7de3436 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the server cookie is generated by aes64 encoding of remote and local +// IP addresses with a master key specified via sockopt TCP_FASTOPEN_KEY +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen_key=00000000-00000000-00000000-00000000` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +// Set a key of a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 (big endian). +// This would produce a cookie of TFO_COOKIE like many other +// tests (which the same key but set via sysctl). + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Request a valid cookie TFO_COOKIE + +0 < S 1428932:1428942(10) win 10000 <mss 1012,nop,nop,FO,sackOK,TS val 1 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 1428933 <mss 1460,sackOK,TS val 10000 ecr 1,nop,wscale 8,FO TFO_COOKIE,nop,nop> + +0 < . 1:1(0) ack 1 win 257 <nop,nop,TS val 2 ecr 10000> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 10001 ecr 2> + +0 < F. 1:1(0) ack 2 win 257 <nop,nop,TS val 3 ecr 10001> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 10002 ecr 3> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test setting the key in the listen state, and produces an identical cookie + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6815001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 < . 1001:1001(0) ack 1 win 257 <nop,nop,TS val 12 ecr 10000> + +0 read(4, ..., 8192) = 1000 + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1001 <nop,nop,TS val 10101 ecr 12> + +0 < F. 1001:1001(0) ack 2 win 257 <nop,nop,TS val 112 ecr 10101> + +0 > . 2:2(0) ack 1002 <nop,nop,TS val 10102 ecr 112> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test invalid key length (must be 16 bytes) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 0) = -1 (Invalid Argument) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 3) = -1 (Invalid Argument) + +// Previous cookie won't be accepted b/c this listener uses the global key (0-0-0-0) + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6814001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8,FO TFO_COOKIE_ZERO,nop,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt new file mode 100644 index 000000000000..e82e06da44c9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a listener socket with pending TFO child. +// This will trigger RST pkt to go out. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// RST pkt is generated for each not-yet-accepted TFO child. +// inet_csk_listen_stop() -> inet_child_forget() -> tcp_disconnect() +// -> tcp_need_reset() is true for SYN_RECV + +0 close(3) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt index 26794e7ddfd5..2a148bb14cbf 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt @@ -1,26 +1,30 @@ // SPDX-License-Identifier: GPL-2.0 `./defaults.sh - ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x602 /proc/sys/net/ipv4/tcp_timestamps=0` + ./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 - +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK> + +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK,nop,nop,FO TFO_COOKIE> +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK> // sk->sk_state is TCP_SYN_RECV - +.1 accept(3, ..., ...) = 4 + +0 accept(3, ..., ...) = 4 + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% // tcp_disconnect() sets sk->sk_state to TCP_CLOSE +0 connect(4, AF_UNSPEC, ...) = 0 +0 > R. 1:1(0) ack 11 win 65535 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% // connect() sets sk->sk_state to TCP_SYN_SENT +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress) +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 %{ assert tcpi_state == TCP_SYN_SENT, tcpi_state }% // tp->fastopen_rsk must be NULL +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt new file mode 100644 index 000000000000..09fb63f78a0e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a TFO socket with unread data. +// This will trigger a RST pkt. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// data_was_unread == true in __tcp_close() + +0 close(4) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 88e914c4eef9..a3323c21f001 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1089,10 +1089,11 @@ cleanup() { cleanup_all_ns - ip link del veth_A-C 2>/dev/null - ip link del veth_A-R1 2>/dev/null - cleanup_del_ovs_internal - cleanup_del_ovs_vswitchd + [ -e "/sys/class/net/veth_A-C" ] && ip link del veth_A-C + [ -e "/sys/class/net/veth_A-R1" ] && ip link del veth_A-R1 + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_internal + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_vswitchd + rm -f "$tmpoutfile" } diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index 6e4fef560873..067265b0a554 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -22,10 +22,6 @@ #define PORT_BASE 8000 -#ifndef __maybe_unused -# define __maybe_unused __attribute__ ((__unused__)) -#endif - static __maybe_unused void pair_udp_setfilter(int fd) { /* the filter below checks for all of the following conditions that diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 221270cee3ea..2938045c5cf9 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -22,6 +22,7 @@ * - TPACKET_V3: RX_RING */ +#undef NDEBUG #include <stdio.h> #include <stdlib.h> #include <sys/types.h> @@ -33,7 +34,6 @@ #include <ctype.h> #include <fcntl.h> #include <unistd.h> -#include <bits/wordsize.h> #include <net/ethernet.h> #include <netinet/ip.h> #include <arpa/inet.h> @@ -785,7 +785,7 @@ static int test_kernel_bit_width(void) static int test_user_bit_width(void) { - return __WORDSIZE; + return sizeof(long) * 8; } static const char *tpacket_str[] = { diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh new file mode 100755 index 000000000000..2db01ece0cc1 --- /dev/null +++ b/tools/testing/selftests/net/route_hint.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test ensures directed broadcast routes use dst hint mechanism + +source lib.sh + +CLIENT_IP4="192.168.0.1" +SERVER_IP4="192.168.0.2" +BROADCAST_ADDRESS="192.168.0.255" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1 + + ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off + ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout" + ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs" + ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +directed_bcast_hint_test() +{ + local rc=0 + + echo "Testing for directed broadcast route hint" + + orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \ + -B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q + sleep 1 + new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + + res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc) + + if [ "${res}" -lt 100 ]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected in_brd to be under 100, got ${res}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v bc)" ]; then + echo "SKIP: Could not run test without bc tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup + +directed_bcast_hint_test +exit $? diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index 4287a8529890..b200019b3c80 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -54,16 +54,16 @@ cleanup echo 1 > /proc/sys/net/core/rps_default_mask setup -chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK echo 3 > /proc/sys/net/core/rps_default_mask -chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0 +chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0 ip link add name $VETH type veth peer netns $NETNS name $VETH ip link set dev $VETH up ip -n $NETNS link set dev $VETH up -chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 -chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3 +chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0 ip link del dev $VETH ip netns del $NETNS @@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0 ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1 ip link add name $VETH type veth peer netns $NETNS name $VETH -chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1 -chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0 +chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0 exit $ret diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index d6c00efeb664..dbf77513f617 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -313,6 +313,8 @@ kci_test_addrlft() slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." if [ $? -eq 1 ]; then + # troubleshoot the reason for our failure + run_cmd ip addr show dev "$devdummy" check_err 1 end_test "FAIL: preferred_lft addresses remaining" return @@ -323,6 +325,11 @@ kci_test_addrlft() kci_test_promote_secondaries() { + run_cmd ifconfig "$devdummy" + if [ $ret -ne 0 ]; then + end_test "SKIP: ifconfig not installed" + return $ksft_skip + fi promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries) sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1 @@ -519,7 +526,7 @@ kci_test_encap_fou() run_cmd_fail ip -netns "$testns" fou del port 9999 run_cmd ip -netns "$testns" fou del port 7777 if [ $ret -ne 0 ]; then - end_test "FAIL: fou"s + end_test "FAIL: fou" return 1 fi @@ -1201,6 +1208,12 @@ do_test_address_proto() local ret=0 local err + run_cmd_grep 'proto' ip address help + if [ $? -ne 0 ];then + end_test "SKIP: addr proto ${what}: iproute2 too old" + return $ksft_skip + fi + ip address add dev "$devdummy" "$addr3" check_err $? proto=$(address_get_proto "$addr3") diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c index db1aeb8c5d1e..be1080003c61 100644 --- a/tools/testing/selftests/net/socket.c +++ b/tools/testing/selftests/net/socket.c @@ -39,6 +39,7 @@ static int run_tests(void) { char err_string1[ERR_STRING_SZ]; char err_string2[ERR_STRING_SZ]; + const char *msg1, *msg2; int i, err; err = 0; @@ -56,13 +57,13 @@ static int run_tests(void) errno == -s->expect) continue; - strerror_r(-s->expect, err_string1, ERR_STRING_SZ); - strerror_r(errno, err_string2, ERR_STRING_SZ); + msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ); + msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "err (%s) got (%s)\n", s->domain, s->type, s->protocol, - err_string1, err_string2); + msg1, msg2); err = -1; break; @@ -70,12 +71,12 @@ static int run_tests(void) close(fd); if (s->expect < 0) { - strerror_r(errno, err_string1, ERR_STRING_SZ); + msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "success got err (%s)\n", s->domain, s->type, s->protocol, - err_string1); + msg1); err = -1; break; diff --git a/tools/testing/selftests/net/tcp_port_share.c b/tools/testing/selftests/net/tcp_port_share.c new file mode 100644 index 000000000000..4c39d599dfce --- /dev/null +++ b/tools/testing/selftests/net/tcp_port_share.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2025 Cloudflare, Inc. + +/* Tests for TCP port sharing (bind bucket reuse). */ + +#include <arpa/inet.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <sched.h> +#include <stdlib.h> + +#include "../kselftest_harness.h" + +#define DST_PORT 30000 +#define SRC_PORT 40000 + +struct sockaddr_inet { + union { + struct sockaddr_storage ss; + struct sockaddr_in6 v6; + struct sockaddr_in v4; + struct sockaddr sa; + }; + socklen_t len; + char str[INET6_ADDRSTRLEN + __builtin_strlen("[]:65535") + 1]; +}; + +const int one = 1; + +static int disconnect(int fd) +{ + return connect(fd, &(struct sockaddr){ AF_UNSPEC }, sizeof(struct sockaddr)); +} + +static int getsockname_port(int fd) +{ + struct sockaddr_inet addr = {}; + int err; + + addr.len = sizeof(addr); + err = getsockname(fd, &addr.sa, &addr.len); + if (err) + return -1; + + switch (addr.sa.sa_family) { + case AF_INET: + return ntohs(addr.v4.sin_port); + case AF_INET6: + return ntohs(addr.v6.sin6_port); + default: + errno = EAFNOSUPPORT; + return -1; + } +} + +static void make_inet_addr(int af, const char *ip, __u16 port, + struct sockaddr_inet *addr) +{ + const char *fmt = ""; + + memset(addr, 0, sizeof(*addr)); + + switch (af) { + case AF_INET: + addr->len = sizeof(addr->v4); + addr->v4.sin_family = af; + addr->v4.sin_port = htons(port); + inet_pton(af, ip, &addr->v4.sin_addr); + fmt = "%s:%hu"; + break; + case AF_INET6: + addr->len = sizeof(addr->v6); + addr->v6.sin6_family = af; + addr->v6.sin6_port = htons(port); + inet_pton(af, ip, &addr->v6.sin6_addr); + fmt = "[%s]:%hu"; + break; + } + + snprintf(addr->str, sizeof(addr->str), fmt, ip, port); +} + +FIXTURE(tcp_port_share) {}; + +FIXTURE_VARIANT(tcp_port_share) { + int domain; + /* IP to listen on and connect to */ + const char *dst_ip; + /* Primary IP to connect from */ + const char *src1_ip; + /* Secondary IP to connect from */ + const char *src2_ip; + /* IP to bind to in order to block the source port */ + const char *bind_ip; +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv4) { + .domain = AF_INET, + .dst_ip = "127.0.0.1", + .src1_ip = "127.1.1.1", + .src2_ip = "127.2.2.2", + .bind_ip = "127.3.3.3", +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv6) { + .domain = AF_INET6, + .dst_ip = "::1", + .src1_ip = "2001:db8::1", + .src2_ip = "2001:db8::2", + .bind_ip = "2001:db8::3", +}; + +FIXTURE_SETUP(tcp_port_share) +{ + int sc; + + ASSERT_EQ(unshare(CLONE_NEWNET), 0); + ASSERT_EQ(system("ip link set dev lo up"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::1/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::2/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::3/32 nodad"), 0); + + sc = open("/proc/sys/net/ipv4/ip_local_port_range", O_WRONLY); + ASSERT_GE(sc, 0); + ASSERT_GT(dprintf(sc, "%hu %hu\n", SRC_PORT, SRC_PORT), 0); + ASSERT_EQ(close(sc), 0); +} + +FIXTURE_TEARDOWN(tcp_port_share) {} + +/* Verify that an ephemeral port becomes available again after the socket + * bound to it and blocking it from reuse is closed. + */ +TEST_F(tcp_port_share, can_reuse_port_after_bind_and_close) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Bind to <bind_ip>:<SRC_PORT>. Block the port from reuse. */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Try to connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + ASSERT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + /* Unbind from <bind_ip>:<SRC_PORT>. Unblock the port for reuse. */ + ASSERT_EQ(close(pb), 0); + + /* Connect again from <src2_ip>:<SRC_PORT> */ + EXPECT_EQ(connect(c2, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + EXPECT_EQ(getsockname_port(c2), SRC_PORT); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +/* Verify that a socket auto-bound during connect() blocks port reuse after + * disconnect (connect(AF_UNSPEC)) followed by an explicit port bind(). + */ +TEST_F(tcp_port_share, port_block_after_disconnect) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Disconnect the socket and bind it to <bind_ip>:<SRC_PORT> to block the port */ + ASSERT_EQ(disconnect(c1), 0) TH_LOG("disconnect: %m"); + ASSERT_EQ(setsockopt(c1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Trigger port-addr bucket state update with another bind() and close() */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + ASSERT_EQ(close(pb), 0); + + /* Connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket: %m"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + EXPECT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + EXPECT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 1b3f89e2b86e..2a7224fe74f2 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -315,6 +315,29 @@ backup_port() tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" + # Check that packets are forwarded out of vx0 when swp1 is + # administratively down and out of swp1 when it is administratively up + # again. + run_cmd "ip -n $sw1 link set dev swp1 down" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 administratively down" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 up" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 administratively up" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + # Remove vx0 as the backup port of swp1 and check that packets are no # longer forwarded out of vx0 when swp1 does not have a carrier. run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" @@ -322,9 +345,9 @@ backup_port() log_test $? 1 "vx0 not configured as backup port of swp1" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "Forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" @@ -332,9 +355,9 @@ backup_port() log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "No forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" } diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh index 062f957950af..8b414d0edada 100755 --- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -21,7 +21,7 @@ test_set_remote() { RET=0 - ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent check_remotes "fdb append" @@ -74,12 +74,12 @@ test_change_mc_remote() { check_command netstat || return - ip_link_add v1 up type veth peer name v2 - ip_link_set_up v2 + adf_ip_link_add v1 up type veth peer name v2 + adf_ip_link_set_up v2 RET=0 - ip_link_add vx up type vxlan dstport 4789 \ + adf_ip_link_add vx up type vxlan dstport 4789 \ local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000 check_membership "group=224.1.1.1 fail=0" \ diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh index 80bf11fdc046..a4550511830a 100755 --- a/tools/testing/selftests/net/tfo_passive.sh +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -95,7 +95,7 @@ wait res=$(cat $out_file) rm $out_file -if [ $res -eq 0 ]; then +if [ "$res" = "0" ]; then echo "got invalid NAPI ID from passive TFO socket" cleanup_ns exit 1 diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index dd093f9df6f1..e788b84551ca 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -439,6 +439,8 @@ TEST_F(tls, sendfile) EXPECT_GE(filefd, 0); fstat(filefd, &st); EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); + + close(filefd); } TEST_F(tls, send_then_sendfile) @@ -460,6 +462,9 @@ TEST_F(tls, send_then_sendfile) EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); + + free(buf); + close(filefd); } static void chunked_sendfile(struct __test_metadata *_metadata, diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index 282f14760940..dbb34c7e09ce 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no ################################################################################ # -log_test() -{ - local rc=$1 - local expected=$2 - local msg="$3" - - if [ ${rc} -eq ${expected} ]; then - printf "TEST: %-60s [ OK ]\n" "${msg}" - nsuccess=$((nsuccess+1)) - else - ret=1 - nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" - if [ "${PAUSE_ON_FAIL}" = "yes" ]; then - echo - echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 - fi - fi -} - run_cmd() { local ns @@ -203,34 +181,137 @@ setup_traceroute6() run_traceroute6() { - if [ ! -x "$(command -v traceroute6)" ]; then - echo "SKIP: Could not run IPV6 test without traceroute6" - return - fi - setup_traceroute6 + RET=0 + # traceroute6 host-2 from host-1 (expects 2000:102::2) run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" - log_test $? 0 "IPV6 traceroute" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute" cleanup_traceroute6 } ################################################################################ +# traceroute6 with VRF test +# +# Verify that in this scenario +# +# ------------------------ N2 +# | | +# ------ ------ N3 ---- +# | R1 | | R2 |------|H2| +# ------ ------ ---- +# | | +# ------------------------ N1 +# | +# ---- +# |H1| +# ---- +# +# Where H1's default route goes through R1 and R1's default route goes through +# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1. +# The interfaces connecting R2 to the different subnets are membmer in a VRF +# and the intention is to check that traceroute6 does not report the VRF's +# address. +# +# Addresses are assigned as follows: +# +# N1: 2000:101::/64 +# N2: 2000:102::/64 +# N3: 2000:103::/64 +# +# R1's host part of address: 1 +# R2's host part of address: 2 +# H1's host part of address: 3 +# H2's host part of address: 4 +# +# For example: +# the IPv6 address of R1's interface on N2 is 2000:102::1/64 + +cleanup_traceroute6_vrf() +{ + cleanup_all_ns +} + +setup_traceroute6_vrf() +{ + # Start clean + cleanup_traceroute6_vrf + + setup_ns h1 h2 r1 r2 + create_ns "$h1" + create_ns "$h2" + create_ns "$r1" + create_ns "$r2" + + ip -n "$r2" link add name vrf100 up type vrf table 100 + ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100 + + # Setup N3 + connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64 + + ip -n "$r2" link set dev eth3 master vrf100 + + ip -n "$h2" route add default via 2000:103::2 + + # Setup N2 + connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64 + + ip -n "$r1" route add default via 2000:102::2 + + ip -n "$r2" link set dev eth2 master vrf100 + + # Setup N1. host-1 and router-2 connect to a bridge in router-1. + ip -n "$r1" link add name br100 up type bridge + ip -n "$r1" addr add 2000:101::1/64 dev br100 + + connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - - + + ip -n "$h1" route add default via 2000:101::1 + + ip -n "$r1" link set dev eth0 master br100 + + connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - - + + ip -n "$r2" link set dev eth1 master vrf100 + + ip -n "$r1" link set dev eth1 master br100 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1 +} + +run_traceroute6_vrf() +{ + setup_traceroute6_vrf + + RET=0 + + # traceroute6 host-2 from host-1 (expects 2000:102::2) + run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute with VRF" + + cleanup_traceroute6_vrf +} + +################################################################################ # traceroute test # -# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. # -# 1.0.3.1/24 +# 1.0.3.3/24 1.0.3.1/24 # ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- # |H1|--------------------------|R1|--------------------------|H2| # ---- N1 ---- N2 ---- # -# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and -# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary -# address on N1. -# +# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and +# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer +# a source address that is on the same subnet as the destination IP of the ICMP +# error message. cleanup_traceroute() { @@ -250,6 +331,7 @@ setup_traceroute() connect_ns $h1 eth0 1.0.1.3/24 - \ $router eth1 1.0.3.1/24 - + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 ip netns exec $h1 ip route add default via 1.0.1.1 ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 @@ -268,35 +350,107 @@ setup_traceroute() run_traceroute() { - if [ ! -x "$(command -v traceroute)" ]; then - echo "SKIP: Could not run IPV4 test without traceroute" - return - fi - setup_traceroute - # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" - log_test $? 0 "IPV4 traceroute" + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute" cleanup_traceroute } ################################################################################ +# traceroute with VRF test +# +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The +# intention is to check that the kernel does not choose an IP assigned to the +# VRF device, but rather an address from the VRF port (eth1) that received the +# packet that generates the ICMP error message. +# +# 1.0.4.1/24 (vrf100) +# 1.0.3.3/24 1.0.3.1/24 +# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- + +cleanup_traceroute_vrf() +{ + cleanup_all_ns +} + +setup_traceroute_vrf() +{ + # Start clean + cleanup_traceroute_vrf + + setup_ns h1 h2 router + create_ns "$h1" + create_ns "$h2" + create_ns "$router" + + ip -n "$router" link add name vrf100 up type vrf table 100 + ip -n "$router" addr add 1.0.4.1/24 dev vrf100 + + connect_ns "$h1" eth0 1.0.1.3/24 - \ + "$router" eth1 1.0.1.1/24 - + + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 + ip -n "$h1" route add default via 1.0.1.1 + + ip -n "$router" link set dev eth1 master vrf100 + ip -n "$router" addr add 1.0.3.1/24 dev eth1 + ip netns exec "$router" sysctl -qw \ + net.ipv4.icmp_errors_use_inbound_ifaddr=1 + + connect_ns "$h2" eth0 1.0.2.4/24 - \ + "$router" eth2 1.0.2.1/24 - + + ip -n "$h2" route add default via 1.0.2.1 + + ip -n "$router" link set dev eth2 master vrf100 + + # Prime the network + ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1 +} + +run_traceroute_vrf() +{ + setup_traceroute_vrf + + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute with VRF" + + cleanup_traceroute_vrf +} + +################################################################################ # Run tests run_tests() { run_traceroute6 + run_traceroute6_vrf run_traceroute + run_traceroute_vrf } ################################################################################ # main -declare -i nfail=0 -declare -i nsuccess=0 - while getopts :pv o do case $o in @@ -306,7 +460,9 @@ do esac done +require_command traceroute6 +require_command traceroute + run_tests -printf "\nTests passed: %3d\n" ${nsuccess} -printf "Tests failed: %3d\n" ${nfail} +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh index e7cb8c678bde..db481af9b6b3 100755 --- a/tools/testing/selftests/net/vlan_bridge_binding.sh +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -18,29 +18,29 @@ setup_prepare() { local port - ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add br up type bridge vlan_filtering 1 for port in d1 d2 d3; do - ip_link_add $port type veth peer name r$port - ip_link_set_up $port - ip_link_set_up r$port - ip_link_set_master $port br + adf_ip_link_add $port type veth peer name r$port + adf_ip_link_set_up $port + adf_ip_link_set_up r$port + adf_ip_link_set_master $port br done - bridge_vlan_add vid 11 dev br self - bridge_vlan_add vid 11 dev d1 master + adf_bridge_vlan_add vid 11 dev br self + adf_bridge_vlan_add vid 11 dev d1 master - bridge_vlan_add vid 12 dev br self - bridge_vlan_add vid 12 dev d2 master + adf_bridge_vlan_add vid 12 dev br self + adf_bridge_vlan_add vid 12 dev d2 master - bridge_vlan_add vid 13 dev br self - bridge_vlan_add vid 13 dev d1 master - bridge_vlan_add vid 13 dev d2 master + adf_bridge_vlan_add vid 13 dev br self + adf_bridge_vlan_add vid 13 dev d1 master + adf_bridge_vlan_add vid 13 dev d2 master - bridge_vlan_add vid 14 dev br self - bridge_vlan_add vid 14 dev d1 master - bridge_vlan_add vid 14 dev d2 master - bridge_vlan_add vid 14 dev d3 master + adf_bridge_vlan_add vid 14 dev br self + adf_bridge_vlan_add vid 14 dev d1 master + adf_bridge_vlan_add vid 14 dev d2 master + adf_bridge_vlan_add vid 14 dev d3 master } operstate_is() @@ -74,7 +74,7 @@ add_one_vlan() local link=$1; shift local id=$1; shift - ip_link_add $link.$id link $link type vlan id $id "$@" + adf_ip_link_add $link.$id link $link type vlan id $id "$@" } add_vlans() @@ -98,7 +98,7 @@ down_netdevs() local dev for dev in "$@"; do - ip_link_set_down $dev + adf_ip_link_set_down $dev done } @@ -207,13 +207,13 @@ test_binding_toggle_off() do_test_binding_off : "on->off" } -dfr_set_binding_on() +adf_set_binding_on() { set_vlans type vlan bridge_binding on defer set_vlans type vlan bridge_binding off } -dfr_set_binding_off() +adf_set_binding_off() { set_vlans type vlan bridge_binding off defer set_vlans type vlan bridge_binding on @@ -223,14 +223,14 @@ test_binding_toggle_on_when_lower_down() { add_vlans bridge_binding off set_vlans up - do_test_binding_on dfr_set_binding_on "off->on when lower down" + do_test_binding_on adf_set_binding_on "off->on when lower down" } test_binding_toggle_off_when_lower_down() { add_vlans bridge_binding on set_vlans up - do_test_binding_off dfr_set_binding_off "on->off when lower down" + do_test_binding_off adf_set_binding_off "on->off when lower down" } test_binding_toggle_on_when_upper_down() diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index e907c2751956..793a2fc33d9f 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -5,10 +5,11 @@ # Inputs: # # YNL_GENS: families we need in the selftests -# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet) +# YNL_GEN_PROGS: TEST_GEN_PROGS which need YNL # YNL_GEN_FILES: TEST_GEN_FILES which need YNL -YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) +YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) \ + $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_PROGS)) YNL_SPECS := \ $(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS)) diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c index e03fe1b9bba2..b3a72f0ac522 100644 --- a/tools/testing/selftests/perf_events/watermark_signal.c +++ b/tools/testing/selftests/perf_events/watermark_signal.c @@ -17,8 +17,6 @@ #include "../kselftest_harness.h" -#define __maybe_unused __attribute__((__unused__)) - static int sigio_count; static void handle_sigio(int signum __maybe_unused, diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 6b78a8382d40..9c9735570abf 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -7,6 +7,7 @@ /proc-loadavg-001 /proc-maps-race /proc-multiple-procfs +/proc-net-dev-lseek /proc-empty-vm /proc-pid-vm /proc-self-map-files-001 diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index be3013515aae..a7de2bb6d8be 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -10,6 +10,7 @@ TEST_GEN_PROGS += fd-003-kthread TEST_GEN_PROGS += proc-2-is-kthread TEST_GEN_PROGS += proc-loadavg-001 TEST_GEN_PROGS += proc-maps-race +TEST_GEN_PROGS += proc-net-dev-lseek TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 94bba4553130..a546475db550 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -32,6 +32,8 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <linux/fs.h> +#include <sys/ioctl.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> @@ -317,6 +319,25 @@ static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self, strcmp(restored_first_line->text, self->first_line.text) == 0; } +static bool query_addr_at(int maps_fd, void *addr, + unsigned long *vma_start, unsigned long *vma_end) +{ + struct procmap_query q; + + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + /* Find the VMA at the split address */ + q.query_addr = (unsigned long long)addr; + q.query_flags = 0; + if (ioctl(maps_fd, PROCMAP_QUERY, &q)) + return false; + + *vma_start = q.vma_start; + *vma_end = q.vma_end; + + return true; +} + static inline bool split_vma(FIXTURE_DATA(proc_maps_race) *self) { return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot | PROT_EXEC, @@ -559,6 +580,8 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) do { bool last_line_changed; bool first_line_changed; + unsigned long vma_start; + unsigned long vma_end; ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); @@ -595,6 +618,19 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) first_line_changed = strcmp(new_first_line.text, self->first_line.text) != 0; ASSERT_EQ(last_line_changed, first_line_changed); + /* Check if PROCMAP_QUERY ioclt() finds the right VMA */ + ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size, + &vma_start, &vma_end)); + /* + * The vma at the split address can be either the same as + * original one (if read before the split) or the same as the + * first line in the second page (if read after the split). + */ + ASSERT_TRUE((vma_start == self->last_line.start_addr && + vma_end == self->last_line.end_addr) || + (vma_start == split_first_line.start_addr && + vma_end == split_first_line.end_addr)); + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); @@ -636,6 +672,9 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); start_test_loop(&start_ts, self->verbose); do { + unsigned long vma_start; + unsigned long vma_end; + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); /* Check if we read vmas after shrinking it */ @@ -662,6 +701,16 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) "Expand result invalid", self)); } + /* Check if PROCMAP_QUERY ioclt() finds the right VMA */ + ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr, &vma_start, &vma_end)); + /* + * The vma should stay at the same address and have either the + * original size of 3 pages or 1 page if read after shrinking. + */ + ASSERT_TRUE(vma_start == self->last_line.start_addr && + (vma_end - vma_start == self->page_size * 3 || + vma_end - vma_start == self->page_size)); + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); @@ -703,6 +752,9 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap) clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); start_test_loop(&start_ts, self->verbose); do { + unsigned long vma_start; + unsigned long vma_end; + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); /* Check if we read vmas after remapping it */ @@ -729,6 +781,19 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap) "Remap restore result invalid", self)); } + /* Check if PROCMAP_QUERY ioclt() finds the right VMA */ + ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size, + &vma_start, &vma_end)); + /* + * The vma should either stay at the same address and have the + * original size of 3 pages or we should find the remapped vma + * at the remap destination address with size of 1 page. + */ + ASSERT_TRUE((vma_start == self->last_line.start_addr && + vma_end - vma_start == self->page_size * 3) || + (vma_start == self->last_line.start_addr + self->page_size && + vma_end - vma_start == self->page_size)); + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); diff --git a/tools/testing/selftests/proc/proc-net-dev-lseek.c b/tools/testing/selftests/proc/proc-net-dev-lseek.c new file mode 100644 index 000000000000..742a3e804451 --- /dev/null +++ b/tools/testing/selftests/proc/proc-net-dev-lseek.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2025 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#undef _GNU_SOURCE +#define _GNU_SOURCE +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <sched.h> +/* + * Test that lseek("/proc/net/dev/", 0, SEEK_SET) + * a) works, + * b) does what you think it does. + */ +int main(void) +{ + /* /proc/net/dev output is deterministic in fresh netns only. */ + if (unshare(CLONE_NEWNET) == -1) { + if (errno == ENOSYS || errno == EPERM) { + return 4; + } + return 1; + } + + const int fd = open("/proc/net/dev", O_RDONLY); + assert(fd >= 0); + + char buf1[4096]; + const ssize_t rv1 = read(fd, buf1, sizeof(buf1)); + /* + * Not "<=", this file can't be empty: + * there is header, "lo" interface with some zeroes. + */ + assert(0 < rv1); + assert(rv1 <= sizeof(buf1)); + + /* Believe it or not, this line broke one day. */ + assert(lseek(fd, 0, SEEK_SET) == 0); + + char buf2[4096]; + const ssize_t rv2 = read(fd, buf2, sizeof(buf2)); + /* Not "<=", see above. */ + assert(0 < rv2); + assert(rv2 <= sizeof(buf2)); + + /* Test that lseek rewinds to the beginning of the file. */ + assert(rv1 == rv2); + assert(memcmp(buf1, buf2, rv1) == 0); + + /* Contents of the file is not validated: this test is about lseek(). */ + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index d04685771952..978cbcb3eb11 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -47,6 +47,10 @@ #include <sys/resource.h> #include <linux/fs.h> +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + #include "../kselftest.h" static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) @@ -218,12 +222,12 @@ static int make_exe(const uint8_t *payload, size_t len) * 2: vsyscall VMA is r-xp vsyscall=emulate */ static volatile int g_vsyscall; -static const char *str_vsyscall; +static const char *str_vsyscall __maybe_unused; -static const char str_vsyscall_0[] = ""; -static const char str_vsyscall_1[] = +static const char str_vsyscall_0[] __maybe_unused = ""; +static const char str_vsyscall_1[] __maybe_unused = "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; -static const char str_vsyscall_2[] = +static const char str_vsyscall_2[] __maybe_unused = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; #ifdef __x86_64__ diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh index fd1ffaa5a135..3c1e5d3f8805 100755 --- a/tools/testing/selftests/rcutorture/bin/jitter.sh +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -39,6 +39,22 @@ do fi done +# Uses global variables startsecs, startns, endsecs, endns, and limit. +# Exit code is success for time not yet elapsed and failure otherwise. +function timecheck { + local done=`awk -v limit=$limit \ + -v startsecs=$startsecs \ + -v startns=$startns \ + -v endsecs=$endsecs \ + -v endns=$endns < /dev/null ' + BEGIN { + delta = (endsecs - startsecs) * 1000 * 1000; + delta += int((endns - startns) / 1000); + print delta >= limit; + }'` + return $done +} + while : do # Check for done. @@ -85,15 +101,20 @@ do n=$(($n+1)) sleep .$sleeptime - # Spin a random duration + # Spin a random duration, but with rather coarse granularity. limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN { srand(n + me + systime()); printf("%06d", int(rand() * spinmax)); }' < /dev/null` n=$(($n+1)) - for i in {1..$limit} + startsecs=`date +%s` + startns=`date +%N` + endsecs=$startns + endns=$endns + while timecheck do - echo > /dev/null + endsecs=`date +%s` + endns=`date +%N` done done diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 611bc03a8dc7..a33ba109ef0b 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -94,6 +94,7 @@ usage () { echo " --do-kvfree / --do-no-kvfree / --no-kvfree" echo " --do-locktorture / --do-no-locktorture / --no-locktorture" echo " --do-none" + echo " --do-normal / --do-no-normal / --no-normal" echo " --do-rcuscale / --do-no-rcuscale / --no-rcuscale" echo " --do-rcutasksflavors / --do-no-rcutasksflavors / --no-rcutasksflavors" echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture" diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index 67d544aaa9a3..06c840e81c8b 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -8,6 +8,7 @@ * exception when executed in all modes. */ #include <endian.h> +#include <asm/fence.h> #if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN) #define RSEQ_SIG 0xf1401073 /* csrr mhartid, x0 */ @@ -24,8 +25,6 @@ #define REG_L __REG_SEL("ld ", "lw ") #define REG_S __REG_SEL("sd ", "sw ") -#define RISCV_FENCE(p, s) \ - __asm__ __volatile__ ("fence " #p "," #s : : : "memory") #define rseq_smp_mb() RISCV_FENCE(rw, rw) #define rseq_smp_rmb() RISCV_FENCE(r, r) #define rseq_smp_wmb() RISCV_FENCE(w, w) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 5596f4df0e9f..b2cc6ea74450 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -879,7 +879,7 @@ "cmdUnderTest": "$TC actions add action police pkts_rate 1000 pkts_burst 200 index 1", "expExitCode": "0", "verifyCmd": "$TC actions ls action police", - "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200", + "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu (4Gb|4096Mb) pkts_rate 1000 pkts_burst 200", "matchCount": "1", "teardown": [ "$TC actions flush action police" diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index ba58589a1145..ca2bd03154e4 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -144,6 +144,8 @@ int main(int argc, char **argv) ret = sscanf(index_str, "%d", &index); if (ret < 0) break; + + index &= 0x0f; if (index > WORKLOAD_TYPE_MAX_INDEX) printf("Invalid workload type index\n"); else diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 5d7f4ecfb816..770269efe42a 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -20,6 +20,7 @@ TEST_PROGS += test_generic_09.sh TEST_PROGS += test_generic_10.sh TEST_PROGS += test_generic_11.sh TEST_PROGS += test_generic_12.sh +TEST_PROGS += test_generic_13.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index b71faba86c3b..6b8123c12a7a 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1384,21 +1384,23 @@ static int cmd_dev_list(struct dev_ctx *ctx) static int cmd_dev_get_features(void) { #define const_ilog2(x) (63 - __builtin_clzll(x)) +#define FEAT_NAME(f) [const_ilog2(f)] = #f static const char *feat_map[] = { - [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY", - [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK", - [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA", - [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY", - [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE", - [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV", - [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE", - [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", - [const_ilog2(UBLK_F_ZONED)] = "ZONED", - [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", - [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE", - [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG", - [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE", - [const_ilog2(UBLK_F_PER_IO_DAEMON)] = "PER_IO_DAEMON", + FEAT_NAME(UBLK_F_SUPPORT_ZERO_COPY), + FEAT_NAME(UBLK_F_URING_CMD_COMP_IN_TASK), + FEAT_NAME(UBLK_F_NEED_GET_DATA), + FEAT_NAME(UBLK_F_USER_RECOVERY), + FEAT_NAME(UBLK_F_USER_RECOVERY_REISSUE), + FEAT_NAME(UBLK_F_UNPRIVILEGED_DEV), + FEAT_NAME(UBLK_F_CMD_IOCTL_ENCODE), + FEAT_NAME(UBLK_F_USER_COPY), + FEAT_NAME(UBLK_F_ZONED), + FEAT_NAME(UBLK_F_USER_RECOVERY_FAIL_IO), + FEAT_NAME(UBLK_F_UPDATE_SIZE), + FEAT_NAME(UBLK_F_AUTO_BUF_REG), + FEAT_NAME(UBLK_F_QUIESCE), + FEAT_NAME(UBLK_F_PER_IO_DAEMON), + FEAT_NAME(UBLK_F_BUF_REG_OFF_DAEMON), }; struct ublk_dev *dev; __u64 features = 0; @@ -1425,7 +1427,7 @@ static int cmd_dev_get_features(void) feat = feat_map[i]; else feat = "unknown"; - printf("\t%-20s: 0x%llx\n", feat, 1ULL << i); + printf("0x%-16llx: %s\n", 1ULL << i, feat); } } diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh index 9227a208ba53..21a31cd5491a 100755 --- a/tools/testing/selftests/ublk/test_generic_01.sh +++ b/tools/testing/selftests/ublk/test_generic_01.sh @@ -10,6 +10,10 @@ if ! _have_program bpftrace; then exit "$UBLK_SKIP_CODE" fi +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "sequential io order" dev_id=$(_add_ublk_dev -t null) diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh index 3e80121e3bf5..12920768b1a0 100755 --- a/tools/testing/selftests/ublk/test_generic_02.sh +++ b/tools/testing/selftests/ublk/test_generic_02.sh @@ -10,6 +10,10 @@ if ! _have_program bpftrace; then exit "$UBLK_SKIP_CODE" fi +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "sequential io order for MQ" dev_id=$(_add_ublk_dev -t null -q 2) diff --git a/tools/testing/selftests/ublk/test_generic_12.sh b/tools/testing/selftests/ublk/test_generic_12.sh index 7abbb00d251d..b4046201b4d9 100755 --- a/tools/testing/selftests/ublk/test_generic_12.sh +++ b/tools/testing/selftests/ublk/test_generic_12.sh @@ -10,6 +10,10 @@ if ! _have_program bpftrace; then exit "$UBLK_SKIP_CODE" fi +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "do imbalanced load, it should be balanced over I/O threads" NTHREADS=6 diff --git a/tools/testing/selftests/ublk/test_generic_13.sh b/tools/testing/selftests/ublk/test_generic_13.sh new file mode 100755 index 000000000000..b7aa90b1cb74 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_13.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_13" +ERR_CODE=0 + +_prep_test "null" "check that feature list is complete" + +if ${UBLK_PROG} features | grep -q unknown; then + echo "# unknown feature detected!" + echo "# did you add a feature and forget to update feat_map in kublk?" + echo "# this failure is expected if running an older test suite against" + echo "# a newer kernel with new features added" + ERR_CODE=255 +fi + +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_null_01.sh b/tools/testing/selftests/ublk/test_null_01.sh index a34203f72668..c2cb8f7a09fe 100755 --- a/tools/testing/selftests/ublk/test_null_01.sh +++ b/tools/testing/selftests/ublk/test_null_01.sh @@ -6,6 +6,10 @@ TID="null_01" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "basic IO test" dev_id=$(_add_ublk_dev -t null) diff --git a/tools/testing/selftests/ublk/test_null_02.sh b/tools/testing/selftests/ublk/test_null_02.sh index 5633ca876655..8accd35beb55 100755 --- a/tools/testing/selftests/ublk/test_null_02.sh +++ b/tools/testing/selftests/ublk/test_null_02.sh @@ -6,6 +6,10 @@ TID="null_02" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "basic IO test with zero copy" dev_id=$(_add_ublk_dev -t null -z) diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index 566cfd90d192..274295061042 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -5,6 +5,10 @@ TID="stress_05" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + run_io_and_remove() { local size=$1 diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h index 36545d1567f1..a852e0b7153e 100644 --- a/tools/testing/selftests/ublk/utils.h +++ b/tools/testing/selftests/ublk/utils.h @@ -2,8 +2,6 @@ #ifndef KUBLK_UTILS_H #define KUBLK_UTILS_H -#define __maybe_unused __attribute__((unused)) - #ifndef min #define min(a, b) ((a) < (b) ? (a) : (b)) #endif diff --git a/tools/testing/selftests/vfio/.gitignore b/tools/testing/selftests/vfio/.gitignore new file mode 100644 index 000000000000..7fadc19d3bca --- /dev/null +++ b/tools/testing/selftests/vfio/.gitignore @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +* +!/**/ +!*.c +!*.h +!*.S +!*.sh +!*.mk +!.gitignore +!Makefile diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile new file mode 100644 index 000000000000..324ba0175a33 --- /dev/null +++ b/tools/testing/selftests/vfio/Makefile @@ -0,0 +1,21 @@ +CFLAGS = $(KHDR_INCLUDES) +TEST_GEN_PROGS += vfio_dma_mapping_test +TEST_GEN_PROGS += vfio_iommufd_setup_test +TEST_GEN_PROGS += vfio_pci_device_test +TEST_GEN_PROGS += vfio_pci_driver_test +TEST_PROGS_EXTENDED := run.sh +include ../lib.mk +include lib/libvfio.mk + +CFLAGS += -I$(top_srcdir)/tools/include +CFLAGS += -MD +CFLAGS += $(EXTRA_CFLAGS) + +$(TEST_GEN_PROGS): %: %.o $(LIBVFIO_O) + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< $(LIBVFIO_O) $(LDLIBS) -o $@ + +TEST_GEN_PROGS_O = $(patsubst %, %.o, $(TEST_GEN_PROGS)) +TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_PROGS_O) $(LIBVFIO_O)) +-include $(TEST_DEP_FILES) + +EXTRA_CLEAN += $(TEST_GEN_PROGS_O) $(TEST_DEP_FILES) diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c new file mode 100644 index 000000000000..0ca2cbc2a316 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdint.h> +#include <unistd.h> + +#include <linux/bits.h> +#include <linux/errno.h> +#include <linux/idxd.h> +#include <linux/io.h> +#include <linux/pci_ids.h> +#include <linux/sizes.h> + +#include <vfio_util.h> + +#include "registers.h" + +/* Vectors 1+ are available for work queue completion interrupts. */ +#define MSIX_VECTOR 1 + +struct dsa_state { + /* Descriptors for copy and batch operations. */ + struct dsa_hw_desc batch[32]; + struct dsa_hw_desc copy[1024]; + + /* Completion records for copy and batch operations. */ + struct dsa_completion_record copy_completion; + struct dsa_completion_record batch_completion; + + /* Cached device registers (and derived data) for easy access */ + union gen_cap_reg gen_cap; + union wq_cap_reg wq_cap; + union group_cap_reg group_cap; + union engine_cap_reg engine_cap; + union offsets_reg table_offsets; + void *wqcfg_table; + void *grpcfg_table; + u64 max_batches; + u64 max_copies_per_batch; + + /* The number of ongoing memcpy operations. */ + u64 memcpy_count; + + /* Buffers used by dsa_send_msi() to generate an interrupt */ + u64 send_msi_src; + u64 send_msi_dst; +}; + +static inline struct dsa_state *to_dsa_state(struct vfio_pci_device *device) +{ + return device->driver.region.vaddr; +} + +static bool dsa_int_handle_request_required(struct vfio_pci_device *device) +{ + void *bar0 = device->bars[0].vaddr; + union gen_cap_reg gen_cap; + u32 cmd_cap; + + gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET); + if (!gen_cap.cmd_cap) + return false; + + cmd_cap = readl(bar0 + IDXD_CMDCAP_OFFSET); + return (cmd_cap >> IDXD_CMD_REQUEST_INT_HANDLE) & 1; +} + +static int dsa_probe(struct vfio_pci_device *device) +{ + if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_DSA_SPR0)) + return -EINVAL; + + if (dsa_int_handle_request_required(device)) { + printf("Device requires requesting interrupt handles\n"); + return -EINVAL; + } + + return 0; +} + +static void dsa_check_sw_err(struct vfio_pci_device *device) +{ + void *reg = device->bars[0].vaddr + IDXD_SWERR_OFFSET; + union sw_err_reg err = {}; + int i; + + for (i = 0; i < ARRAY_SIZE(err.bits); i++) { + err.bits[i] = readq(reg + offsetof(union sw_err_reg, bits[i])); + + /* No errors */ + if (i == 0 && !err.valid) + return; + } + + fprintf(stderr, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", + err.bits[0], err.bits[1], err.bits[2], err.bits[3]); + + fprintf(stderr, " valid: 0x%x\n", err.valid); + fprintf(stderr, " overflow: 0x%x\n", err.overflow); + fprintf(stderr, " desc_valid: 0x%x\n", err.desc_valid); + fprintf(stderr, " wq_idx_valid: 0x%x\n", err.wq_idx_valid); + fprintf(stderr, " batch: 0x%x\n", err.batch); + fprintf(stderr, " fault_rw: 0x%x\n", err.fault_rw); + fprintf(stderr, " priv: 0x%x\n", err.priv); + fprintf(stderr, " error: 0x%x\n", err.error); + fprintf(stderr, " wq_idx: 0x%x\n", err.wq_idx); + fprintf(stderr, " operation: 0x%x\n", err.operation); + fprintf(stderr, " pasid: 0x%x\n", err.pasid); + fprintf(stderr, " batch_idx: 0x%x\n", err.batch_idx); + fprintf(stderr, " invalid_flags: 0x%x\n", err.invalid_flags); + fprintf(stderr, " fault_addr: 0x%lx\n", err.fault_addr); + + VFIO_FAIL("Software Error Detected!\n"); +} + +static void dsa_command(struct vfio_pci_device *device, u32 cmd) +{ + union idxd_command_reg cmd_reg = { .cmd = cmd }; + u32 sleep_ms = 1, attempts = 5000 / sleep_ms; + void *bar0 = device->bars[0].vaddr; + u32 status; + u8 err; + + writel(cmd_reg.bits, bar0 + IDXD_CMD_OFFSET); + + for (;;) { + dsa_check_sw_err(device); + + status = readl(bar0 + IDXD_CMDSTS_OFFSET); + if (!(status & IDXD_CMDSTS_ACTIVE)) + break; + + VFIO_ASSERT_GT(--attempts, 0); + usleep(sleep_ms * 1000); + } + + err = status & IDXD_CMDSTS_ERR_MASK; + VFIO_ASSERT_EQ(err, 0, "Error issuing command 0x%x: 0x%x\n", cmd, err); +} + +static void dsa_wq_init(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + union wq_cap_reg wq_cap = dsa->wq_cap; + union wqcfg wqcfg; + u64 wqcfg_size; + int i; + + VFIO_ASSERT_GT((u32)wq_cap.num_wqs, 0); + + wqcfg = (union wqcfg) { + .wq_size = wq_cap.total_wq_size, + .mode = 1, + .priority = 1, + /* + * Disable Address Translation Service (if enabled) so that VFIO + * selftests using this driver can generate I/O page faults. + */ + .wq_ats_disable = wq_cap.wq_ats_support, + .max_xfer_shift = dsa->gen_cap.max_xfer_shift, + .max_batch_shift = dsa->gen_cap.max_batch_shift, + .op_config[0] = BIT(DSA_OPCODE_MEMMOVE) | BIT(DSA_OPCODE_BATCH), + }; + + wqcfg_size = 1UL << (wq_cap.wqcfg_size + IDXD_WQCFG_MIN); + + for (i = 0; i < wqcfg_size / sizeof(wqcfg.bits[0]); i++) + writel(wqcfg.bits[i], dsa->wqcfg_table + offsetof(union wqcfg, bits[i])); +} + +static void dsa_group_init(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + union group_cap_reg group_cap = dsa->group_cap; + union engine_cap_reg engine_cap = dsa->engine_cap; + + VFIO_ASSERT_GT((u32)group_cap.num_groups, 0); + VFIO_ASSERT_GT((u32)engine_cap.num_engines, 0); + + /* Assign work queue 0 and engine 0 to group 0 */ + writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, wqs[0])); + writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, engines)); +} + +static void dsa_register_cache_init(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + void *bar0 = device->bars[0].vaddr; + + dsa->gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET); + dsa->wq_cap.bits = readq(bar0 + IDXD_WQCAP_OFFSET); + dsa->group_cap.bits = readq(bar0 + IDXD_GRPCAP_OFFSET); + dsa->engine_cap.bits = readq(bar0 + IDXD_ENGCAP_OFFSET); + + dsa->table_offsets.bits[0] = readq(bar0 + IDXD_TABLE_OFFSET); + dsa->table_offsets.bits[1] = readq(bar0 + IDXD_TABLE_OFFSET + 8); + + dsa->wqcfg_table = bar0 + dsa->table_offsets.wqcfg * IDXD_TABLE_MULT; + dsa->grpcfg_table = bar0 + dsa->table_offsets.grpcfg * IDXD_TABLE_MULT; + + dsa->max_batches = 1U << (dsa->wq_cap.total_wq_size + IDXD_WQCFG_MIN); + dsa->max_batches = min(dsa->max_batches, ARRAY_SIZE(dsa->batch)); + + dsa->max_copies_per_batch = 1UL << dsa->gen_cap.max_batch_shift; + dsa->max_copies_per_batch = min(dsa->max_copies_per_batch, ARRAY_SIZE(dsa->copy)); +} + +static void dsa_init(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + + VFIO_ASSERT_GE(device->driver.region.size, sizeof(*dsa)); + + vfio_pci_config_writew(device, PCI_COMMAND, + PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER | + PCI_COMMAND_INTX_DISABLE); + + dsa_command(device, IDXD_CMD_RESET_DEVICE); + + dsa_register_cache_init(device); + dsa_wq_init(device); + dsa_group_init(device); + + dsa_command(device, IDXD_CMD_ENABLE_DEVICE); + dsa_command(device, IDXD_CMD_ENABLE_WQ); + + vfio_pci_msix_enable(device, MSIX_VECTOR, 1); + + device->driver.max_memcpy_count = + dsa->max_batches * dsa->max_copies_per_batch; + device->driver.max_memcpy_size = 1UL << dsa->gen_cap.max_xfer_shift; + device->driver.msi = MSIX_VECTOR; +} + +static void dsa_remove(struct vfio_pci_device *device) +{ + dsa_command(device, IDXD_CMD_RESET_DEVICE); + vfio_pci_msix_disable(device); +} + +static int dsa_completion_wait(struct vfio_pci_device *device, + struct dsa_completion_record *completion) +{ + u8 status; + + for (;;) { + dsa_check_sw_err(device); + + status = READ_ONCE(completion->status); + if (status) + break; + + usleep(1000); + } + + if (status == DSA_COMP_SUCCESS) + return 0; + + printf("Error detected during memcpy operation: 0x%x\n", status); + return -1; +} + +static void dsa_copy_desc_init(struct vfio_pci_device *device, + struct dsa_hw_desc *desc, + iova_t src, iova_t dst, u64 size, + bool interrupt) +{ + struct dsa_state *dsa = to_dsa_state(device); + u16 flags; + + flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR; + + if (interrupt) + flags |= IDXD_OP_FLAG_RCI; + + *desc = (struct dsa_hw_desc) { + .opcode = DSA_OPCODE_MEMMOVE, + .flags = flags, + .priv = 1, + .src_addr = src, + .dst_addr = dst, + .xfer_size = size, + .completion_addr = to_iova(device, &dsa->copy_completion), + .int_handle = interrupt ? MSIX_VECTOR : 0, + }; +} + +static void dsa_batch_desc_init(struct vfio_pci_device *device, + struct dsa_hw_desc *desc, + u64 count) +{ + struct dsa_state *dsa = to_dsa_state(device); + + *desc = (struct dsa_hw_desc) { + .opcode = DSA_OPCODE_BATCH, + .flags = IDXD_OP_FLAG_CRAV, + .priv = 1, + .completion_addr = to_iova(device, &dsa->batch_completion), + .desc_list_addr = to_iova(device, &dsa->copy[0]), + .desc_count = count, + }; +} + +static void dsa_desc_write(struct vfio_pci_device *device, struct dsa_hw_desc *desc) +{ + /* Write the contents (not address) of the 64-byte descriptor to the device. */ + iosubmit_cmds512(device->bars[2].vaddr, desc, 1); +} + +static void dsa_memcpy_one(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, bool interrupt) +{ + struct dsa_state *dsa = to_dsa_state(device); + + memset(&dsa->copy_completion, 0, sizeof(dsa->copy_completion)); + + dsa_copy_desc_init(device, &dsa->copy[0], src, dst, size, interrupt); + dsa_desc_write(device, &dsa->copy[0]); +} + +static void dsa_memcpy_batch(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, u64 count) +{ + struct dsa_state *dsa = to_dsa_state(device); + int i; + + memset(&dsa->batch_completion, 0, sizeof(dsa->batch_completion)); + + for (i = 0; i < ARRAY_SIZE(dsa->copy); i++) { + struct dsa_hw_desc *copy_desc = &dsa->copy[i]; + + dsa_copy_desc_init(device, copy_desc, src, dst, size, false); + + /* Don't request completions for individual copies. */ + copy_desc->flags &= ~IDXD_OP_FLAG_RCR; + } + + for (i = 0; i < ARRAY_SIZE(dsa->batch) && count; i++) { + struct dsa_hw_desc *batch_desc = &dsa->batch[i]; + int nr_copies; + + nr_copies = min(count, dsa->max_copies_per_batch); + count -= nr_copies; + + /* + * Batches must have at least 2 copies, so handle the case where + * there is exactly 1 copy left by doing one less copy in this + * batch and then 2 in the next. + */ + if (count == 1) { + nr_copies--; + count++; + } + + dsa_batch_desc_init(device, batch_desc, nr_copies); + + /* Request a completion for the last batch. */ + if (!count) + batch_desc->flags |= IDXD_OP_FLAG_RCR; + + dsa_desc_write(device, batch_desc); + } + + VFIO_ASSERT_EQ(count, 0, "Failed to start %lu copies.\n", count); +} + +static void dsa_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, u64 count) +{ + struct dsa_state *dsa = to_dsa_state(device); + + /* DSA devices require at least 2 copies per batch. */ + if (count == 1) + dsa_memcpy_one(device, src, dst, size, false); + else + dsa_memcpy_batch(device, src, dst, size, count); + + dsa->memcpy_count = count; +} + +static int dsa_memcpy_wait(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + int r; + + if (dsa->memcpy_count == 1) + r = dsa_completion_wait(device, &dsa->copy_completion); + else + r = dsa_completion_wait(device, &dsa->batch_completion); + + dsa->memcpy_count = 0; + + return r; +} + +static void dsa_send_msi(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + + dsa_memcpy_one(device, + to_iova(device, &dsa->send_msi_src), + to_iova(device, &dsa->send_msi_dst), + sizeof(dsa->send_msi_src), true); + + VFIO_ASSERT_EQ(dsa_completion_wait(device, &dsa->copy_completion), 0); +} + +const struct vfio_pci_driver_ops dsa_ops = { + .name = "dsa", + .probe = dsa_probe, + .init = dsa_init, + .remove = dsa_remove, + .memcpy_start = dsa_memcpy_start, + .memcpy_wait = dsa_memcpy_wait, + .send_msi = dsa_send_msi, +}; diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h new file mode 120000 index 000000000000..bde657c3c2af --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h @@ -0,0 +1 @@ +../../../../../../../drivers/dma/idxd/registers.h
\ No newline at end of file diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h new file mode 120000 index 000000000000..8ab52ddd4458 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h @@ -0,0 +1 @@ +../../../../../../../drivers/dma/ioat/hw.h
\ No newline at end of file diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c new file mode 100644 index 000000000000..c3b91d9b1f59 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdint.h> +#include <unistd.h> + +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/pci_ids.h> +#include <linux/sizes.h> + +#include <vfio_util.h> + +#include "hw.h" +#include "registers.h" + +#define IOAT_DMACOUNT_MAX UINT16_MAX + +struct ioat_state { + /* Single descriptor used to issue DMA memcpy operations */ + struct ioat_dma_descriptor desc; + + /* Copy buffers used by ioat_send_msi() to generate an interrupt. */ + u64 send_msi_src; + u64 send_msi_dst; +}; + +static inline struct ioat_state *to_ioat_state(struct vfio_pci_device *device) +{ + return device->driver.region.vaddr; +} + +static inline void *ioat_channel_registers(struct vfio_pci_device *device) +{ + return device->bars[0].vaddr + IOAT_CHANNEL_MMIO_SIZE; +} + +static int ioat_probe(struct vfio_pci_device *device) +{ + u8 version; + int r; + + if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_IOAT_SKX)) + return -EINVAL; + + VFIO_ASSERT_NOT_NULL(device->bars[0].vaddr); + + version = readb(device->bars[0].vaddr + IOAT_VER_OFFSET); + switch (version) { + case IOAT_VER_3_2: + case IOAT_VER_3_3: + r = 0; + break; + default: + printf("ioat: Unsupported version: 0x%x\n", version); + r = -EINVAL; + } + return r; +} + +static u64 ioat_channel_status(void *bar) +{ + return readq(bar + IOAT_CHANSTS_OFFSET) & IOAT_CHANSTS_STATUS; +} + +static void ioat_clear_errors(struct vfio_pci_device *device) +{ + void *registers = ioat_channel_registers(device); + u32 errors; + + errors = vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET); + vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors); + + errors = vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET); + vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors); + + errors = readl(registers + IOAT_CHANERR_OFFSET); + writel(errors, registers + IOAT_CHANERR_OFFSET); +} + +static void ioat_reset(struct vfio_pci_device *device) +{ + void *registers = ioat_channel_registers(device); + u32 sleep_ms = 1, attempts = 5000 / sleep_ms; + u8 chancmd; + + ioat_clear_errors(device); + + writeb(IOAT_CHANCMD_RESET, registers + IOAT2_CHANCMD_OFFSET); + + for (;;) { + chancmd = readb(registers + IOAT2_CHANCMD_OFFSET); + if (!(chancmd & IOAT_CHANCMD_RESET)) + break; + + VFIO_ASSERT_GT(--attempts, 0); + usleep(sleep_ms * 1000); + } + + VFIO_ASSERT_EQ(ioat_channel_status(registers), IOAT_CHANSTS_HALTED); +} + +static void ioat_init(struct vfio_pci_device *device) +{ + struct ioat_state *ioat = to_ioat_state(device); + u8 intrctrl; + + VFIO_ASSERT_GE(device->driver.region.size, sizeof(*ioat)); + + vfio_pci_config_writew(device, PCI_COMMAND, + PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER | + PCI_COMMAND_INTX_DISABLE); + + ioat_reset(device); + + /* Enable the use of MXI-x interrupts for channel interrupts. */ + intrctrl = IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + writeb(intrctrl, device->bars[0].vaddr + IOAT_INTRCTRL_OFFSET); + + vfio_pci_msix_enable(device, 0, device->msix_info.count); + + device->driver.msi = 0; + device->driver.max_memcpy_size = + 1UL << readb(device->bars[0].vaddr + IOAT_XFERCAP_OFFSET); + device->driver.max_memcpy_count = IOAT_DMACOUNT_MAX; +} + +static void ioat_remove(struct vfio_pci_device *device) +{ + ioat_reset(device); + vfio_pci_msix_disable(device); +} + +static void ioat_handle_error(struct vfio_pci_device *device) +{ + void *registers = ioat_channel_registers(device); + + printf("Error detected during memcpy operation!\n" + " CHANERR: 0x%x\n" + " CHANERR_INT: 0x%x\n" + " DMAUNCERRSTS: 0x%x\n", + readl(registers + IOAT_CHANERR_OFFSET), + vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET), + vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET)); + + ioat_reset(device); +} + +static int ioat_memcpy_wait(struct vfio_pci_device *device) +{ + void *registers = ioat_channel_registers(device); + u64 status; + int r = 0; + + /* Wait until all operations complete. */ + for (;;) { + status = ioat_channel_status(registers); + if (status == IOAT_CHANSTS_DONE) + break; + + if (status == IOAT_CHANSTS_HALTED) { + ioat_handle_error(device); + return -1; + } + } + + /* Put the channel into the SUSPENDED state. */ + writeb(IOAT_CHANCMD_SUSPEND, registers + IOAT2_CHANCMD_OFFSET); + for (;;) { + status = ioat_channel_status(registers); + if (status == IOAT_CHANSTS_SUSPENDED) + break; + } + + return r; +} + +static void __ioat_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, + u16 count, bool interrupt) +{ + void *registers = ioat_channel_registers(device); + struct ioat_state *ioat = to_ioat_state(device); + u64 desc_iova; + u16 chanctrl; + + desc_iova = to_iova(device, &ioat->desc); + ioat->desc = (struct ioat_dma_descriptor) { + .ctl_f.op = IOAT_OP_COPY, + .ctl_f.int_en = interrupt, + .src_addr = src, + .dst_addr = dst, + .size = size, + .next = desc_iova, + }; + + /* Tell the device the address of the descriptor. */ + writeq(desc_iova, registers + IOAT2_CHAINADDR_OFFSET); + + /* (Re)Enable the channel interrupt and abort on any errors */ + chanctrl = IOAT_CHANCTRL_INT_REARM | IOAT_CHANCTRL_ANY_ERR_ABORT_EN; + writew(chanctrl, registers + IOAT_CHANCTRL_OFFSET); + + /* Kick off @count DMA copy operation(s). */ + writew(count, registers + IOAT_CHAN_DMACOUNT_OFFSET); +} + +static void ioat_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, + u64 count) +{ + __ioat_memcpy_start(device, src, dst, size, count, false); +} + +static void ioat_send_msi(struct vfio_pci_device *device) +{ + struct ioat_state *ioat = to_ioat_state(device); + + __ioat_memcpy_start(device, + to_iova(device, &ioat->send_msi_src), + to_iova(device, &ioat->send_msi_dst), + sizeof(ioat->send_msi_src), 1, true); + + VFIO_ASSERT_EQ(ioat_memcpy_wait(device), 0); +} + +const struct vfio_pci_driver_ops ioat_ops = { + .name = "ioat", + .probe = ioat_probe, + .init = ioat_init, + .remove = ioat_remove, + .memcpy_start = ioat_memcpy_start, + .memcpy_wait = ioat_memcpy_wait, + .send_msi = ioat_send_msi, +}; diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h new file mode 120000 index 000000000000..0b809cfd8fe6 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h @@ -0,0 +1 @@ +../../../../../../../drivers/dma/ioat/registers.h
\ No newline at end of file diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h new file mode 100644 index 000000000000..ed31606e01b7 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h @@ -0,0 +1,295 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H +#define SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H + +#include <fcntl.h> +#include <string.h> +#include <linux/vfio.h> +#include <linux/list.h> +#include <linux/pci_regs.h> + +#include "../../../kselftest.h" + +#define VFIO_LOG_AND_EXIT(...) do { \ + fprintf(stderr, " " __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + exit(KSFT_FAIL); \ +} while (0) + +#define VFIO_ASSERT_OP(_lhs, _rhs, _op, ...) do { \ + typeof(_lhs) __lhs = (_lhs); \ + typeof(_rhs) __rhs = (_rhs); \ + \ + if (__lhs _op __rhs) \ + break; \ + \ + fprintf(stderr, "%s:%u: Assertion Failure\n\n", __FILE__, __LINE__); \ + fprintf(stderr, " Expression: " #_lhs " " #_op " " #_rhs "\n"); \ + fprintf(stderr, " Observed: %#lx %s %#lx\n", \ + (u64)__lhs, #_op, (u64)__rhs); \ + fprintf(stderr, " [errno: %d - %s]\n", errno, strerror(errno)); \ + VFIO_LOG_AND_EXIT(__VA_ARGS__); \ +} while (0) + +#define VFIO_ASSERT_EQ(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, ==, ##__VA_ARGS__) +#define VFIO_ASSERT_NE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, !=, ##__VA_ARGS__) +#define VFIO_ASSERT_LT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <, ##__VA_ARGS__) +#define VFIO_ASSERT_LE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <=, ##__VA_ARGS__) +#define VFIO_ASSERT_GT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >, ##__VA_ARGS__) +#define VFIO_ASSERT_GE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >=, ##__VA_ARGS__) +#define VFIO_ASSERT_TRUE(_a, ...) VFIO_ASSERT_NE(false, (_a), ##__VA_ARGS__) +#define VFIO_ASSERT_FALSE(_a, ...) VFIO_ASSERT_EQ(false, (_a), ##__VA_ARGS__) +#define VFIO_ASSERT_NULL(_a, ...) VFIO_ASSERT_EQ(NULL, _a, ##__VA_ARGS__) +#define VFIO_ASSERT_NOT_NULL(_a, ...) VFIO_ASSERT_NE(NULL, _a, ##__VA_ARGS__) + +#define VFIO_FAIL(_fmt, ...) do { \ + fprintf(stderr, "%s:%u: FAIL\n\n", __FILE__, __LINE__); \ + VFIO_LOG_AND_EXIT(_fmt, ##__VA_ARGS__); \ +} while (0) + +struct vfio_iommu_mode { + const char *name; + const char *container_path; + unsigned long iommu_type; +}; + +/* + * Generator for VFIO selftests fixture variants that replicate across all + * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE() + * which should then use FIXTURE_VARIANT_ADD() to create the variant. + */ +#define FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(...) \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1_iommu, ##__VA_ARGS__); \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1v2_iommu, ##__VA_ARGS__); \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1, ##__VA_ARGS__); \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__); \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd, ##__VA_ARGS__) + +struct vfio_pci_bar { + struct vfio_region_info info; + void *vaddr; +}; + +typedef u64 iova_t; + +#define INVALID_IOVA UINT64_MAX + +struct vfio_dma_region { + struct list_head link; + void *vaddr; + iova_t iova; + u64 size; +}; + +struct vfio_pci_device; + +struct vfio_pci_driver_ops { + const char *name; + + /** + * @probe() - Check if the driver supports the given device. + * + * Return: 0 on success, non-0 on failure. + */ + int (*probe)(struct vfio_pci_device *device); + + /** + * @init() - Initialize the driver for @device. + * + * Must be called after device->driver.region has been initialized. + */ + void (*init)(struct vfio_pci_device *device); + + /** + * remove() - Deinitialize the driver for @device. + */ + void (*remove)(struct vfio_pci_device *device); + + /** + * memcpy_start() - Kick off @count repeated memcpy operations from + * [@src, @src + @size) to [@dst, @dst + @size). + * + * Guarantees: + * - The device will attempt DMA reads on [src, src + size). + * - The device will attempt DMA writes on [dst, dst + size). + * - The device will not generate any interrupts. + * + * memcpy_start() returns immediately, it does not wait for the + * copies to complete. + */ + void (*memcpy_start)(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, u64 count); + + /** + * memcpy_wait() - Wait until the memcpy operations started by + * memcpy_start() have finished. + * + * Guarantees: + * - All in-flight DMAs initiated by memcpy_start() are fully complete + * before memcpy_wait() returns. + * + * Returns non-0 if the driver detects that an error occurred during the + * memcpy, 0 otherwise. + */ + int (*memcpy_wait)(struct vfio_pci_device *device); + + /** + * send_msi() - Make the device send the MSI device->driver.msi. + * + * Guarantees: + * - The device will send the MSI once. + */ + void (*send_msi)(struct vfio_pci_device *device); +}; + +struct vfio_pci_driver { + const struct vfio_pci_driver_ops *ops; + bool initialized; + bool memcpy_in_progress; + + /* Region to be used by the driver (e.g. for in-memory descriptors) */ + struct vfio_dma_region region; + + /* The maximum size that can be passed to memcpy_start(). */ + u64 max_memcpy_size; + + /* The maximum count that can be passed to memcpy_start(). */ + u64 max_memcpy_count; + + /* The MSI vector the device will signal in ops->send_msi(). */ + int msi; +}; + +struct vfio_pci_device { + int fd; + + const struct vfio_iommu_mode *iommu_mode; + int group_fd; + int container_fd; + + int iommufd; + u32 ioas_id; + + struct vfio_device_info info; + struct vfio_region_info config_space; + struct vfio_pci_bar bars[PCI_STD_NUM_BARS]; + + struct vfio_irq_info msi_info; + struct vfio_irq_info msix_info; + + struct list_head dma_regions; + + /* eventfds for MSI and MSI-x interrupts */ + int msi_eventfds[PCI_MSIX_FLAGS_QSIZE + 1]; + + struct vfio_pci_driver driver; +}; + +/* + * Return the BDF string of the device that the test should use. + * + * If a BDF string is provided by the user on the command line (as the last + * element of argv[]), then this function will return that and decrement argc + * by 1. + * + * Otherwise this function will attempt to use the environment variable + * $VFIO_SELFTESTS_BDF. + * + * If BDF cannot be determined then the test will exit with KSFT_SKIP. + */ +const char *vfio_selftests_get_bdf(int *argc, char *argv[]); +const char *vfio_pci_get_cdev_path(const char *bdf); + +extern const char *default_iommu_mode; + +struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode); +void vfio_pci_device_cleanup(struct vfio_pci_device *device); +void vfio_pci_device_reset(struct vfio_pci_device *device); + +void vfio_pci_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region); +void vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region); + +void vfio_pci_config_access(struct vfio_pci_device *device, bool write, + size_t config, size_t size, void *data); + +#define vfio_pci_config_read(_device, _offset, _type) ({ \ + _type __data; \ + vfio_pci_config_access((_device), false, _offset, sizeof(__data), &__data); \ + __data; \ +}) + +#define vfio_pci_config_readb(_d, _o) vfio_pci_config_read(_d, _o, u8) +#define vfio_pci_config_readw(_d, _o) vfio_pci_config_read(_d, _o, u16) +#define vfio_pci_config_readl(_d, _o) vfio_pci_config_read(_d, _o, u32) + +#define vfio_pci_config_write(_device, _offset, _value, _type) do { \ + _type __data = (_value); \ + vfio_pci_config_access((_device), true, _offset, sizeof(_type), &__data); \ +} while (0) + +#define vfio_pci_config_writeb(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u8) +#define vfio_pci_config_writew(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u16) +#define vfio_pci_config_writel(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u32) + +void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, + u32 vector, int count); +void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index); +void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector); + +static inline void fcntl_set_nonblock(int fd) +{ + int r; + + r = fcntl(fd, F_GETFL, 0); + VFIO_ASSERT_NE(r, -1, "F_GETFL failed for fd %d\n", fd); + + r = fcntl(fd, F_SETFL, r | O_NONBLOCK); + VFIO_ASSERT_NE(r, -1, "F_SETFL O_NONBLOCK failed for fd %d\n", fd); +} + +static inline void vfio_pci_msi_enable(struct vfio_pci_device *device, + u32 vector, int count) +{ + vfio_pci_irq_enable(device, VFIO_PCI_MSI_IRQ_INDEX, vector, count); +} + +static inline void vfio_pci_msi_disable(struct vfio_pci_device *device) +{ + vfio_pci_irq_disable(device, VFIO_PCI_MSI_IRQ_INDEX); +} + +static inline void vfio_pci_msix_enable(struct vfio_pci_device *device, + u32 vector, int count) +{ + vfio_pci_irq_enable(device, VFIO_PCI_MSIX_IRQ_INDEX, vector, count); +} + +static inline void vfio_pci_msix_disable(struct vfio_pci_device *device) +{ + vfio_pci_irq_disable(device, VFIO_PCI_MSIX_IRQ_INDEX); +} + +iova_t __to_iova(struct vfio_pci_device *device, void *vaddr); +iova_t to_iova(struct vfio_pci_device *device, void *vaddr); + +static inline bool vfio_pci_device_match(struct vfio_pci_device *device, + u16 vendor_id, u16 device_id) +{ + return (vendor_id == vfio_pci_config_readw(device, PCI_VENDOR_ID)) && + (device_id == vfio_pci_config_readw(device, PCI_DEVICE_ID)); +} + +void vfio_pci_driver_probe(struct vfio_pci_device *device); +void vfio_pci_driver_init(struct vfio_pci_device *device); +void vfio_pci_driver_remove(struct vfio_pci_device *device); +int vfio_pci_driver_memcpy(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size); +void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, + u64 count); +int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device); +void vfio_pci_driver_send_msi(struct vfio_pci_device *device); + +#endif /* SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H */ diff --git a/tools/testing/selftests/vfio/lib/libvfio.mk b/tools/testing/selftests/vfio/lib/libvfio.mk new file mode 100644 index 000000000000..5d11c3a89a28 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/libvfio.mk @@ -0,0 +1,24 @@ +include $(top_srcdir)/scripts/subarch.include +ARCH ?= $(SUBARCH) + +VFIO_DIR := $(selfdir)/vfio + +LIBVFIO_C := lib/vfio_pci_device.c +LIBVFIO_C += lib/vfio_pci_driver.c + +ifeq ($(ARCH:x86_64=x86),x86) +LIBVFIO_C += lib/drivers/ioat/ioat.c +LIBVFIO_C += lib/drivers/dsa/dsa.c +endif + +LIBVFIO_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBVFIO_C)) + +LIBVFIO_O_DIRS := $(shell dirname $(LIBVFIO_O) | uniq) +$(shell mkdir -p $(LIBVFIO_O_DIRS)) + +CFLAGS += -I$(VFIO_DIR)/lib/include + +$(LIBVFIO_O): $(OUTPUT)/%.o : $(VFIO_DIR)/%.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +EXTRA_CLEAN += $(LIBVFIO_O) diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c new file mode 100644 index 000000000000..0921b2451ba5 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -0,0 +1,594 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <dirent.h> +#include <fcntl.h> +#include <libgen.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/eventfd.h> +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <uapi/linux/types.h> +#include <linux/limits.h> +#include <linux/mman.h> +#include <linux/types.h> +#include <linux/vfio.h> +#include <linux/iommufd.h> + +#include "../../../kselftest.h" +#include <vfio_util.h> + +#define PCI_SYSFS_PATH "/sys/bus/pci/devices" + +#define ioctl_assert(_fd, _op, _arg) do { \ + void *__arg = (_arg); \ + int __ret = ioctl((_fd), (_op), (__arg)); \ + VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \ +} while (0) + +iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) +{ + struct vfio_dma_region *region; + + list_for_each_entry(region, &device->dma_regions, link) { + if (vaddr < region->vaddr) + continue; + + if (vaddr >= region->vaddr + region->size) + continue; + + return region->iova + (vaddr - region->vaddr); + } + + return INVALID_IOVA; +} + +iova_t to_iova(struct vfio_pci_device *device, void *vaddr) +{ + iova_t iova; + + iova = __to_iova(device, vaddr); + VFIO_ASSERT_NE(iova, INVALID_IOVA, "%p is not mapped into device.\n", vaddr); + + return iova; +} + +static void vfio_pci_irq_set(struct vfio_pci_device *device, + u32 index, u32 vector, u32 count, int *fds) +{ + u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {}; + struct vfio_irq_set *irq = (void *)&buf; + int *irq_fds = (void *)&irq->data; + + irq->argsz = sizeof(buf); + irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER; + irq->index = index; + irq->start = vector; + irq->count = count; + + if (count) { + irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD; + memcpy(irq_fds, fds, sizeof(int) * count); + } else { + irq->flags |= VFIO_IRQ_SET_DATA_NONE; + } + + ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq); +} + +void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector) +{ + struct vfio_irq_set irq = { + .argsz = sizeof(irq), + .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE, + .index = index, + .start = vector, + .count = 1, + }; + + ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq); +} + +static void check_supported_irq_index(u32 index) +{ + /* VFIO selftests only supports MSI and MSI-x for now. */ + VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX || + index == VFIO_PCI_MSIX_IRQ_INDEX, + "Unsupported IRQ index: %u\n", index); +} + +void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector, + int count) +{ + int i; + + check_supported_irq_index(index); + + for (i = vector; i < vector + count; i++) { + VFIO_ASSERT_LT(device->msi_eventfds[i], 0); + device->msi_eventfds[i] = eventfd(0, 0); + VFIO_ASSERT_GE(device->msi_eventfds[i], 0); + } + + vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector); +} + +void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index) +{ + int i; + + check_supported_irq_index(index); + + for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { + if (device->msi_eventfds[i] < 0) + continue; + + VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); + device->msi_eventfds[i] = -1; + } + + vfio_pci_irq_set(device, index, 0, 0, NULL); +} + +static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index, + struct vfio_irq_info *irq_info) +{ + irq_info->argsz = sizeof(*irq_info); + irq_info->index = index; + + ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info); +} + +static void vfio_iommu_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + struct vfio_iommu_type1_dma_map args = { + .argsz = sizeof(args), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .vaddr = (u64)region->vaddr, + .iova = region->iova, + .size = region->size, + }; + + ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &args); +} + +static void iommufd_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + struct iommu_ioas_map args = { + .size = sizeof(args), + .flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_FIXED_IOVA, + .user_va = (u64)region->vaddr, + .iova = region->iova, + .length = region->size, + .ioas_id = device->ioas_id, + }; + + ioctl_assert(device->iommufd, IOMMU_IOAS_MAP, &args); +} + +void vfio_pci_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + if (device->iommufd) + iommufd_dma_map(device, region); + else + vfio_iommu_dma_map(device, region); + + list_add(®ion->link, &device->dma_regions); +} + +static void vfio_iommu_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + struct vfio_iommu_type1_dma_unmap args = { + .argsz = sizeof(args), + .iova = region->iova, + .size = region->size, + }; + + ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &args); +} + +static void iommufd_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + struct iommu_ioas_unmap args = { + .size = sizeof(args), + .iova = region->iova, + .length = region->size, + .ioas_id = device->ioas_id, + }; + + ioctl_assert(device->iommufd, IOMMU_IOAS_UNMAP, &args); +} + +void vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + if (device->iommufd) + iommufd_dma_unmap(device, region); + else + vfio_iommu_dma_unmap(device, region); + + list_del(®ion->link); +} + +static void vfio_pci_region_get(struct vfio_pci_device *device, int index, + struct vfio_region_info *info) +{ + memset(info, 0, sizeof(*info)); + + info->argsz = sizeof(*info); + info->index = index; + + ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info); +} + +static void vfio_pci_bar_map(struct vfio_pci_device *device, int index) +{ + struct vfio_pci_bar *bar = &device->bars[index]; + int prot = 0; + + VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); + VFIO_ASSERT_NULL(bar->vaddr); + VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP); + + if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ) + prot |= PROT_READ; + if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE) + prot |= PROT_WRITE; + + bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED, + device->fd, bar->info.offset); + VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED); +} + +static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index) +{ + struct vfio_pci_bar *bar = &device->bars[index]; + + VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); + VFIO_ASSERT_NOT_NULL(bar->vaddr); + + VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0); + bar->vaddr = NULL; +} + +static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device) +{ + int i; + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (device->bars[i].vaddr) + vfio_pci_bar_unmap(device, i); + } +} + +void vfio_pci_config_access(struct vfio_pci_device *device, bool write, + size_t config, size_t size, void *data) +{ + struct vfio_region_info *config_space = &device->config_space; + int ret; + + if (write) + ret = pwrite(device->fd, data, size, config_space->offset + config); + else + ret = pread(device->fd, data, size, config_space->offset + config); + + VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n", + write ? "write to" : "read from", config); +} + +void vfio_pci_device_reset(struct vfio_pci_device *device) +{ + ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL); +} + +static unsigned int vfio_pci_get_group_from_dev(const char *bdf) +{ + char dev_iommu_group_path[PATH_MAX] = {0}; + char sysfs_path[PATH_MAX] = {0}; + unsigned int group; + int ret; + + snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf); + + ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path)); + VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf); + + ret = sscanf(basename(dev_iommu_group_path), "%u", &group); + VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf); + + return group; +} + +static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf) +{ + struct vfio_group_status group_status = { + .argsz = sizeof(group_status), + }; + char group_path[32]; + int group; + + group = vfio_pci_get_group_from_dev(bdf); + snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group); + + device->group_fd = open(group_path, O_RDWR); + VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path); + + ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status); + VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE); + + ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->container_fd); +} + +static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf) +{ + unsigned long iommu_type = device->iommu_mode->iommu_type; + const char *path = device->iommu_mode->container_path; + int version; + int ret; + + device->container_fd = open(path, O_RDWR); + VFIO_ASSERT_GE(device->container_fd, 0, "open(%s) failed\n", path); + + version = ioctl(device->container_fd, VFIO_GET_API_VERSION); + VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version); + + vfio_pci_group_setup(device, bdf); + + ret = ioctl(device->container_fd, VFIO_CHECK_EXTENSION, iommu_type); + VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type); + + ioctl_assert(device->container_fd, VFIO_SET_IOMMU, (void *)iommu_type); + + device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf); + VFIO_ASSERT_GE(device->fd, 0); +} + +static void vfio_pci_device_setup(struct vfio_pci_device *device) +{ + int i; + + device->info.argsz = sizeof(device->info); + ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info); + + vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space); + + /* Sanity check VFIO does not advertise mmap for config space */ + VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP), + "PCI config space should not support mmap()\n"); + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + struct vfio_pci_bar *bar = device->bars + i; + + vfio_pci_region_get(device, i, &bar->info); + if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP) + vfio_pci_bar_map(device, i); + } + + vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info); + vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info); + + for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) + device->msi_eventfds[i] = -1; +} + +const char *vfio_pci_get_cdev_path(const char *bdf) +{ + char dir_path[PATH_MAX]; + struct dirent *entry; + char *cdev_path; + DIR *dir; + + cdev_path = calloc(PATH_MAX, 1); + VFIO_ASSERT_NOT_NULL(cdev_path); + + snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf); + + dir = opendir(dir_path); + VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path); + + while ((entry = readdir(dir)) != NULL) { + /* Find the file that starts with "vfio" */ + if (strncmp("vfio", entry->d_name, 4)) + continue; + + snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name); + break; + } + + VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n"); + VFIO_ASSERT_EQ(closedir(dir), 0); + + return cdev_path; +} + +/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */ +static const struct vfio_iommu_mode iommu_modes[] = { + { + .name = "vfio_type1_iommu", + .container_path = "/dev/vfio/vfio", + .iommu_type = VFIO_TYPE1_IOMMU, + }, + { + .name = "vfio_type1v2_iommu", + .container_path = "/dev/vfio/vfio", + .iommu_type = VFIO_TYPE1v2_IOMMU, + }, + { + .name = "iommufd_compat_type1", + .container_path = "/dev/iommu", + .iommu_type = VFIO_TYPE1_IOMMU, + }, + { + .name = "iommufd_compat_type1v2", + .container_path = "/dev/iommu", + .iommu_type = VFIO_TYPE1v2_IOMMU, + }, + { + .name = "iommufd", + }, +}; + +const char *default_iommu_mode = "iommufd"; + +static const struct vfio_iommu_mode *lookup_iommu_mode(const char *iommu_mode) +{ + int i; + + if (!iommu_mode) + iommu_mode = default_iommu_mode; + + for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) { + if (strcmp(iommu_mode, iommu_modes[i].name)) + continue; + + return &iommu_modes[i]; + } + + VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode); +} + +static void vfio_device_bind_iommufd(int device_fd, int iommufd) +{ + struct vfio_device_bind_iommufd args = { + .argsz = sizeof(args), + .iommufd = iommufd, + }; + + ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args); +} + +static u32 iommufd_ioas_alloc(int iommufd) +{ + struct iommu_ioas_alloc args = { + .size = sizeof(args), + }; + + ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args); + return args.out_ioas_id; +} + +static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id) +{ + struct vfio_device_attach_iommufd_pt args = { + .argsz = sizeof(args), + .pt_id = pt_id, + }; + + ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args); +} + +static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf) +{ + const char *cdev_path = vfio_pci_get_cdev_path(bdf); + + device->fd = open(cdev_path, O_RDWR); + VFIO_ASSERT_GE(device->fd, 0); + free((void *)cdev_path); + + /* + * Require device->iommufd to be >0 so that a simple non-0 check can be + * used to check if iommufd is enabled. In practice open() will never + * return 0 unless stdin is closed. + */ + device->iommufd = open("/dev/iommu", O_RDWR); + VFIO_ASSERT_GT(device->iommufd, 0); + + vfio_device_bind_iommufd(device->fd, device->iommufd); + device->ioas_id = iommufd_ioas_alloc(device->iommufd); + vfio_device_attach_iommufd_pt(device->fd, device->ioas_id); +} + +struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode) +{ + struct vfio_pci_device *device; + + device = calloc(1, sizeof(*device)); + VFIO_ASSERT_NOT_NULL(device); + + INIT_LIST_HEAD(&device->dma_regions); + + device->iommu_mode = lookup_iommu_mode(iommu_mode); + + if (device->iommu_mode->container_path) + vfio_pci_container_setup(device, bdf); + else + vfio_pci_iommufd_setup(device, bdf); + + vfio_pci_device_setup(device); + vfio_pci_driver_probe(device); + + return device; +} + +void vfio_pci_device_cleanup(struct vfio_pci_device *device) +{ + int i; + + if (device->driver.initialized) + vfio_pci_driver_remove(device); + + vfio_pci_bar_unmap_all(device); + + VFIO_ASSERT_EQ(close(device->fd), 0); + + for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { + if (device->msi_eventfds[i] < 0) + continue; + + VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); + } + + if (device->iommufd) { + VFIO_ASSERT_EQ(close(device->iommufd), 0); + } else { + VFIO_ASSERT_EQ(close(device->group_fd), 0); + VFIO_ASSERT_EQ(close(device->container_fd), 0); + } + + free(device); +} + +static bool is_bdf(const char *str) +{ + unsigned int s, b, d, f; + int length, count; + + count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length); + return count == 4 && length == strlen(str); +} + +const char *vfio_selftests_get_bdf(int *argc, char *argv[]) +{ + char *bdf; + + if (*argc > 1 && is_bdf(argv[*argc - 1])) + return argv[--(*argc)]; + + bdf = getenv("VFIO_SELFTESTS_BDF"); + if (bdf) { + VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf); + return bdf; + } + + fprintf(stderr, "Unable to determine which device to use, skipping test.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "To pass the device address via environment variable:\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " export VFIO_SELFTESTS_BDF=segment:bus:device.function\n"); + fprintf(stderr, " %s [options]\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "To pass the device address via argv:\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " %s [options] segment:bus:device.function\n", argv[0]); + fprintf(stderr, "\n"); + exit(KSFT_SKIP); +} diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c new file mode 100644 index 000000000000..e5e8723ecb41 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdio.h> + +#include "../../../kselftest.h" +#include <vfio_util.h> + +#ifdef __x86_64__ +extern struct vfio_pci_driver_ops dsa_ops; +extern struct vfio_pci_driver_ops ioat_ops; +#endif + +static struct vfio_pci_driver_ops *driver_ops[] = { +#ifdef __x86_64__ + &dsa_ops, + &ioat_ops, +#endif +}; + +void vfio_pci_driver_probe(struct vfio_pci_device *device) +{ + struct vfio_pci_driver_ops *ops; + int i; + + VFIO_ASSERT_NULL(device->driver.ops); + + for (i = 0; i < ARRAY_SIZE(driver_ops); i++) { + ops = driver_ops[i]; + + if (ops->probe(device)) + continue; + + printf("Driver found: %s\n", ops->name); + device->driver.ops = ops; + } +} + +static void vfio_check_driver_op(struct vfio_pci_driver *driver, void *op, + const char *op_name) +{ + VFIO_ASSERT_NOT_NULL(driver->ops); + VFIO_ASSERT_NOT_NULL(op, "Driver has no %s()\n", op_name); + VFIO_ASSERT_EQ(driver->initialized, op != driver->ops->init); + VFIO_ASSERT_EQ(driver->memcpy_in_progress, op == driver->ops->memcpy_wait); +} + +#define VFIO_CHECK_DRIVER_OP(_driver, _op) do { \ + struct vfio_pci_driver *__driver = (_driver); \ + vfio_check_driver_op(__driver, __driver->ops->_op, #_op); \ +} while (0) + +void vfio_pci_driver_init(struct vfio_pci_device *device) +{ + struct vfio_pci_driver *driver = &device->driver; + + VFIO_ASSERT_NOT_NULL(driver->region.vaddr); + VFIO_CHECK_DRIVER_OP(driver, init); + + driver->ops->init(device); + + driver->initialized = true; + + printf("%s: region: vaddr %p, iova 0x%lx, size 0x%lx\n", + driver->ops->name, + driver->region.vaddr, + driver->region.iova, + driver->region.size); + + printf("%s: max_memcpy_size 0x%lx, max_memcpy_count 0x%lx\n", + driver->ops->name, + driver->max_memcpy_size, + driver->max_memcpy_count); +} + +void vfio_pci_driver_remove(struct vfio_pci_device *device) +{ + struct vfio_pci_driver *driver = &device->driver; + + VFIO_CHECK_DRIVER_OP(driver, remove); + + driver->ops->remove(device); + driver->initialized = false; +} + +void vfio_pci_driver_send_msi(struct vfio_pci_device *device) +{ + struct vfio_pci_driver *driver = &device->driver; + + VFIO_CHECK_DRIVER_OP(driver, send_msi); + + driver->ops->send_msi(device); +} + +void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, + u64 count) +{ + struct vfio_pci_driver *driver = &device->driver; + + VFIO_ASSERT_LE(size, driver->max_memcpy_size); + VFIO_ASSERT_LE(count, driver->max_memcpy_count); + VFIO_CHECK_DRIVER_OP(driver, memcpy_start); + + driver->ops->memcpy_start(device, src, dst, size, count); + driver->memcpy_in_progress = true; +} + +int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device) +{ + struct vfio_pci_driver *driver = &device->driver; + int r; + + VFIO_CHECK_DRIVER_OP(driver, memcpy_wait); + + r = driver->ops->memcpy_wait(device); + driver->memcpy_in_progress = false; + + return r; +} + +int vfio_pci_driver_memcpy(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size) +{ + vfio_pci_driver_memcpy_start(device, src, dst, size, 1); + + return vfio_pci_driver_memcpy_wait(device); +} diff --git a/tools/testing/selftests/vfio/run.sh b/tools/testing/selftests/vfio/run.sh new file mode 100755 index 000000000000..0476b6d7adc3 --- /dev/null +++ b/tools/testing/selftests/vfio/run.sh @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +# Global variables initialized in main() and then used during cleanup() when +# the script exits. +declare DEVICE_BDF +declare NEW_DRIVER +declare OLD_DRIVER +declare OLD_NUMVFS +declare DRIVER_OVERRIDE + +function write_to() { + # Unfortunately set -x does not show redirects so use echo to manually + # tell the user what commands are being run. + echo "+ echo \"${2}\" > ${1}" + echo "${2}" > ${1} +} + +function bind() { + write_to /sys/bus/pci/drivers/${2}/bind ${1} +} + +function unbind() { + write_to /sys/bus/pci/drivers/${2}/unbind ${1} +} + +function set_sriov_numvfs() { + write_to /sys/bus/pci/devices/${1}/sriov_numvfs ${2} +} + +function set_driver_override() { + write_to /sys/bus/pci/devices/${1}/driver_override ${2} +} + +function clear_driver_override() { + set_driver_override ${1} "" +} + +function cleanup() { + if [ "${NEW_DRIVER}" ]; then unbind ${DEVICE_BDF} ${NEW_DRIVER} ; fi + if [ "${DRIVER_OVERRIDE}" ]; then clear_driver_override ${DEVICE_BDF} ; fi + if [ "${OLD_DRIVER}" ]; then bind ${DEVICE_BDF} ${OLD_DRIVER} ; fi + if [ "${OLD_NUMVFS}" ]; then set_sriov_numvfs ${DEVICE_BDF} ${OLD_NUMVFS} ; fi +} + +function usage() { + echo "usage: $0 [-d segment:bus:device.function] [-s] [-h] [cmd ...]" >&2 + echo >&2 + echo " -d: The BDF of the device to use for the test (required)" >&2 + echo " -h: Show this help message" >&2 + echo " -s: Drop into a shell rather than running a command" >&2 + echo >&2 + echo " cmd: The command to run and arguments to pass to it." >&2 + echo " Required when not using -s. The SBDF will be " >&2 + echo " appended to the argument list." >&2 + exit 1 +} + +function main() { + local shell + + while getopts "d:hs" opt; do + case $opt in + d) DEVICE_BDF="$OPTARG" ;; + s) shell=true ;; + *) usage ;; + esac + done + + # Shift past all optional arguments. + shift $((OPTIND - 1)) + + # Check that the user passed in the command to run. + [ ! "${shell}" ] && [ $# = 0 ] && usage + + # Check that the user passed in a BDF. + [ "${DEVICE_BDF}" ] || usage + + trap cleanup EXIT + set -e + + test -d /sys/bus/pci/devices/${DEVICE_BDF} + + if [ -f /sys/bus/pci/devices/${DEVICE_BDF}/sriov_numvfs ]; then + OLD_NUMVFS=$(cat /sys/bus/pci/devices/${DEVICE_BDF}/sriov_numvfs) + set_sriov_numvfs ${DEVICE_BDF} 0 + fi + + if [ -L /sys/bus/pci/devices/${DEVICE_BDF}/driver ]; then + OLD_DRIVER=$(basename $(readlink -m /sys/bus/pci/devices/${DEVICE_BDF}/driver)) + unbind ${DEVICE_BDF} ${OLD_DRIVER} + fi + + set_driver_override ${DEVICE_BDF} vfio-pci + DRIVER_OVERRIDE=true + + bind ${DEVICE_BDF} vfio-pci + NEW_DRIVER=vfio-pci + + echo + if [ "${shell}" ]; then + echo "Dropping into ${SHELL} with VFIO_SELFTESTS_BDF=${DEVICE_BDF}" + VFIO_SELFTESTS_BDF=${DEVICE_BDF} ${SHELL} + else + "$@" ${DEVICE_BDF} + fi + echo +} + +main "$@" diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c new file mode 100644 index 000000000000..ab19c54a774d --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <linux/limits.h> +#include <linux/mman.h> +#include <linux/sizes.h> +#include <linux/vfio.h> + +#include <vfio_util.h> + +#include "../kselftest_harness.h" + +static const char *device_bdf; + +struct iommu_mapping { + u64 pgd; + u64 p4d; + u64 pud; + u64 pmd; + u64 pte; +}; + +static void parse_next_value(char **line, u64 *value) +{ + char *token; + + token = strtok_r(*line, " \t|\n", line); + if (!token) + return; + + /* Caller verifies `value`. No need to check return value. */ + sscanf(token, "0x%lx", value); +} + +static int intel_iommu_mapping_get(const char *bdf, u64 iova, + struct iommu_mapping *mapping) +{ + char iommu_mapping_path[PATH_MAX], line[PATH_MAX]; + u64 line_iova = -1; + int ret = -ENOENT; + FILE *file; + char *rest; + + snprintf(iommu_mapping_path, sizeof(iommu_mapping_path), + "/sys/kernel/debug/iommu/intel/%s/domain_translation_struct", + bdf); + + printf("Searching for IOVA 0x%lx in %s\n", iova, iommu_mapping_path); + + file = fopen(iommu_mapping_path, "r"); + VFIO_ASSERT_NOT_NULL(file, "fopen(%s) failed", iommu_mapping_path); + + while (fgets(line, sizeof(line), file)) { + rest = line; + + parse_next_value(&rest, &line_iova); + if (line_iova != (iova / getpagesize())) + continue; + + /* + * Ensure each struct field is initialized in case of empty + * page table values. + */ + memset(mapping, 0, sizeof(*mapping)); + parse_next_value(&rest, &mapping->pgd); + parse_next_value(&rest, &mapping->p4d); + parse_next_value(&rest, &mapping->pud); + parse_next_value(&rest, &mapping->pmd); + parse_next_value(&rest, &mapping->pte); + + ret = 0; + break; + } + + fclose(file); + + if (ret) + printf("IOVA not found\n"); + + return ret; +} + +static int iommu_mapping_get(const char *bdf, u64 iova, + struct iommu_mapping *mapping) +{ + if (!access("/sys/kernel/debug/iommu/intel", F_OK)) + return intel_iommu_mapping_get(bdf, iova, mapping); + + return -EOPNOTSUPP; +} + +FIXTURE(vfio_dma_mapping_test) { + struct vfio_pci_device *device; +}; + +FIXTURE_VARIANT(vfio_dma_mapping_test) { + const char *iommu_mode; + u64 size; + int mmap_flags; +}; + +#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode, _name, _size, _mmap_flags) \ +FIXTURE_VARIANT_ADD(vfio_dma_mapping_test, _iommu_mode ## _ ## _name) { \ + .iommu_mode = #_iommu_mode, \ + .size = (_size), \ + .mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE | (_mmap_flags), \ +} + +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous, 0, 0); +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_2mb, SZ_2M, MAP_HUGETLB | MAP_HUGE_2MB); +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | MAP_HUGE_1GB); + +FIXTURE_SETUP(vfio_dma_mapping_test) +{ + self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); +} + +FIXTURE_TEARDOWN(vfio_dma_mapping_test) +{ + vfio_pci_device_cleanup(self->device); +} + +TEST_F(vfio_dma_mapping_test, dma_map_unmap) +{ + const u64 size = variant->size ?: getpagesize(); + const int flags = variant->mmap_flags; + struct vfio_dma_region region; + struct iommu_mapping mapping; + u64 mapping_size = size; + int rc; + + region.vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + + /* Skip the test if there aren't enough HugeTLB pages available. */ + if (flags & MAP_HUGETLB && region.vaddr == MAP_FAILED) + SKIP(return, "mmap() failed: %s (%d)\n", strerror(errno), errno); + else + ASSERT_NE(region.vaddr, MAP_FAILED); + + region.iova = (u64)region.vaddr; + region.size = size; + + vfio_pci_dma_map(self->device, ®ion); + printf("Mapped HVA %p (size 0x%lx) at IOVA 0x%lx\n", region.vaddr, size, region.iova); + + ASSERT_EQ(region.iova, to_iova(self->device, region.vaddr)); + + rc = iommu_mapping_get(device_bdf, region.iova, &mapping); + if (rc == -EOPNOTSUPP) + goto unmap; + + /* + * IOMMUFD compatibility-mode does not support huge mappings when + * using VFIO_TYPE1_IOMMU. + */ + if (!strcmp(variant->iommu_mode, "iommufd_compat_type1")) + mapping_size = SZ_4K; + + ASSERT_EQ(0, rc); + printf("Found IOMMU mappings for IOVA 0x%lx:\n", region.iova); + printf("PGD: 0x%016lx\n", mapping.pgd); + printf("P4D: 0x%016lx\n", mapping.p4d); + printf("PUD: 0x%016lx\n", mapping.pud); + printf("PMD: 0x%016lx\n", mapping.pmd); + printf("PTE: 0x%016lx\n", mapping.pte); + + switch (mapping_size) { + case SZ_4K: + ASSERT_NE(0, mapping.pte); + break; + case SZ_2M: + ASSERT_EQ(0, mapping.pte); + ASSERT_NE(0, mapping.pmd); + break; + case SZ_1G: + ASSERT_EQ(0, mapping.pte); + ASSERT_EQ(0, mapping.pmd); + ASSERT_NE(0, mapping.pud); + break; + default: + VFIO_FAIL("Unrecognized size: 0x%lx\n", mapping_size); + } + +unmap: + vfio_pci_dma_unmap(self->device, ®ion); + printf("Unmapped IOVA 0x%lx\n", region.iova); + ASSERT_EQ(INVALID_IOVA, __to_iova(self->device, region.vaddr)); + ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping)); + + ASSERT_TRUE(!munmap(region.vaddr, size)); +} + +int main(int argc, char *argv[]) +{ + device_bdf = vfio_selftests_get_bdf(&argc, argv); + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c new file mode 100644 index 000000000000..3655106b912d --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <uapi/linux/types.h> +#include <linux/limits.h> +#include <linux/sizes.h> +#include <linux/vfio.h> +#include <linux/iommufd.h> + +#include <stdint.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include <vfio_util.h> +#include "../kselftest_harness.h" + +static const char iommu_dev_path[] = "/dev/iommu"; +static const char *cdev_path; + +static int vfio_device_bind_iommufd_ioctl(int cdev_fd, int iommufd) +{ + struct vfio_device_bind_iommufd bind_args = { + .argsz = sizeof(bind_args), + .iommufd = iommufd, + }; + + return ioctl(cdev_fd, VFIO_DEVICE_BIND_IOMMUFD, &bind_args); +} + +static int vfio_device_get_info_ioctl(int cdev_fd) +{ + struct vfio_device_info info_args = { .argsz = sizeof(info_args) }; + + return ioctl(cdev_fd, VFIO_DEVICE_GET_INFO, &info_args); +} + +static int vfio_device_ioas_alloc_ioctl(int iommufd, struct iommu_ioas_alloc *alloc_args) +{ + *alloc_args = (struct iommu_ioas_alloc){ + .size = sizeof(struct iommu_ioas_alloc), + }; + + return ioctl(iommufd, IOMMU_IOAS_ALLOC, alloc_args); +} + +static int vfio_device_attach_iommufd_pt_ioctl(int cdev_fd, u32 pt_id) +{ + struct vfio_device_attach_iommufd_pt attach_args = { + .argsz = sizeof(attach_args), + .pt_id = pt_id, + }; + + return ioctl(cdev_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_args); +} + +static int vfio_device_detach_iommufd_pt_ioctl(int cdev_fd) +{ + struct vfio_device_detach_iommufd_pt detach_args = { + .argsz = sizeof(detach_args), + }; + + return ioctl(cdev_fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_args); +} + +FIXTURE(vfio_cdev) { + int cdev_fd; + int iommufd; +}; + +FIXTURE_SETUP(vfio_cdev) +{ + ASSERT_LE(0, (self->cdev_fd = open(cdev_path, O_RDWR, 0))); + ASSERT_LE(0, (self->iommufd = open(iommu_dev_path, O_RDWR, 0))); +} + +FIXTURE_TEARDOWN(vfio_cdev) +{ + ASSERT_EQ(0, close(self->cdev_fd)); + ASSERT_EQ(0, close(self->iommufd)); +} + +TEST_F(vfio_cdev, bind) +{ + ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); + ASSERT_EQ(0, vfio_device_get_info_ioctl(self->cdev_fd)); +} + +TEST_F(vfio_cdev, get_info_without_bind_fails) +{ + ASSERT_NE(0, vfio_device_get_info_ioctl(self->cdev_fd)); +} + +TEST_F(vfio_cdev, bind_bad_iommufd_fails) +{ + ASSERT_NE(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, -2)); +} + +TEST_F(vfio_cdev, repeated_bind_fails) +{ + ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); + ASSERT_NE(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); +} + +TEST_F(vfio_cdev, attach_detatch_pt) +{ + struct iommu_ioas_alloc alloc_args; + + ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); + ASSERT_EQ(0, vfio_device_ioas_alloc_ioctl(self->iommufd, &alloc_args)); + ASSERT_EQ(0, vfio_device_attach_iommufd_pt_ioctl(self->cdev_fd, alloc_args.out_ioas_id)); + ASSERT_EQ(0, vfio_device_detach_iommufd_pt_ioctl(self->cdev_fd)); +} + +TEST_F(vfio_cdev, attach_invalid_pt_fails) +{ + ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); + ASSERT_NE(0, vfio_device_attach_iommufd_pt_ioctl(self->cdev_fd, UINT32_MAX)); +} + +int main(int argc, char *argv[]) +{ + const char *device_bdf = vfio_selftests_get_bdf(&argc, argv); + + cdev_path = vfio_pci_get_cdev_path(device_bdf); + printf("Using cdev device %s\n", cdev_path); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/vfio/vfio_pci_device_test.c b/tools/testing/selftests/vfio/vfio_pci_device_test.c new file mode 100644 index 000000000000..7a270698e4d2 --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_pci_device_test.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <fcntl.h> +#include <stdlib.h> + +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <linux/limits.h> +#include <linux/pci_regs.h> +#include <linux/sizes.h> +#include <linux/vfio.h> + +#include <vfio_util.h> + +#include "../kselftest_harness.h" + +static const char *device_bdf; + +/* + * Limit the number of MSIs enabled/disabled by the test regardless of the + * number of MSIs the device itself supports, e.g. to avoid hitting IRTE limits. + */ +#define MAX_TEST_MSI 16U + +FIXTURE(vfio_pci_device_test) { + struct vfio_pci_device *device; +}; + +FIXTURE_SETUP(vfio_pci_device_test) +{ + self->device = vfio_pci_device_init(device_bdf, default_iommu_mode); +} + +FIXTURE_TEARDOWN(vfio_pci_device_test) +{ + vfio_pci_device_cleanup(self->device); +} + +#define read_pci_id_from_sysfs(_file) ({ \ + char __sysfs_path[PATH_MAX]; \ + char __buf[32]; \ + int __fd; \ + \ + snprintf(__sysfs_path, PATH_MAX, "/sys/bus/pci/devices/%s/%s", device_bdf, _file); \ + ASSERT_GT((__fd = open(__sysfs_path, O_RDONLY)), 0); \ + ASSERT_GT(read(__fd, __buf, ARRAY_SIZE(__buf)), 0); \ + ASSERT_EQ(0, close(__fd)); \ + (u16)strtoul(__buf, NULL, 0); \ +}) + +TEST_F(vfio_pci_device_test, config_space_read_write) +{ + u16 vendor, device; + u16 command; + + /* Check that Vendor and Device match what the kernel reports. */ + vendor = read_pci_id_from_sysfs("vendor"); + device = read_pci_id_from_sysfs("device"); + ASSERT_TRUE(vfio_pci_device_match(self->device, vendor, device)); + + printf("Vendor: %04x, Device: %04x\n", vendor, device); + + command = vfio_pci_config_readw(self->device, PCI_COMMAND); + ASSERT_FALSE(command & PCI_COMMAND_MASTER); + + vfio_pci_config_writew(self->device, PCI_COMMAND, command | PCI_COMMAND_MASTER); + command = vfio_pci_config_readw(self->device, PCI_COMMAND); + ASSERT_TRUE(command & PCI_COMMAND_MASTER); + printf("Enabled Bus Mastering (command: %04x)\n", command); + + vfio_pci_config_writew(self->device, PCI_COMMAND, command & ~PCI_COMMAND_MASTER); + command = vfio_pci_config_readw(self->device, PCI_COMMAND); + ASSERT_FALSE(command & PCI_COMMAND_MASTER); + printf("Disabled Bus Mastering (command: %04x)\n", command); +} + +TEST_F(vfio_pci_device_test, validate_bars) +{ + struct vfio_pci_bar *bar; + int i; + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + bar = &self->device->bars[i]; + + if (!(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) { + printf("BAR %d does not support mmap()\n", i); + ASSERT_EQ(NULL, bar->vaddr); + continue; + } + + /* + * BARs that support mmap() should be automatically mapped by + * vfio_pci_device_init(). + */ + ASSERT_NE(NULL, bar->vaddr); + ASSERT_NE(0, bar->info.size); + printf("BAR %d mapped at %p (size 0x%llx)\n", i, bar->vaddr, bar->info.size); + } +} + +FIXTURE(vfio_pci_irq_test) { + struct vfio_pci_device *device; +}; + +FIXTURE_VARIANT(vfio_pci_irq_test) { + int irq_index; +}; + +FIXTURE_VARIANT_ADD(vfio_pci_irq_test, msi) { + .irq_index = VFIO_PCI_MSI_IRQ_INDEX, +}; + +FIXTURE_VARIANT_ADD(vfio_pci_irq_test, msix) { + .irq_index = VFIO_PCI_MSIX_IRQ_INDEX, +}; + +FIXTURE_SETUP(vfio_pci_irq_test) +{ + self->device = vfio_pci_device_init(device_bdf, default_iommu_mode); +} + +FIXTURE_TEARDOWN(vfio_pci_irq_test) +{ + vfio_pci_device_cleanup(self->device); +} + +TEST_F(vfio_pci_irq_test, enable_trigger_disable) +{ + bool msix = variant->irq_index == VFIO_PCI_MSIX_IRQ_INDEX; + int msi_eventfd; + u32 count; + u64 value; + int i; + + if (msix) + count = self->device->msix_info.count; + else + count = self->device->msi_info.count; + + count = min(count, MAX_TEST_MSI); + + if (!count) + SKIP(return, "MSI%s: not supported\n", msix ? "-x" : ""); + + vfio_pci_irq_enable(self->device, variant->irq_index, 0, count); + printf("MSI%s: enabled %d interrupts\n", msix ? "-x" : "", count); + + for (i = 0; i < count; i++) { + msi_eventfd = self->device->msi_eventfds[i]; + + fcntl_set_nonblock(msi_eventfd); + ASSERT_EQ(-1, read(msi_eventfd, &value, 8)); + ASSERT_EQ(EAGAIN, errno); + + vfio_pci_irq_trigger(self->device, variant->irq_index, i); + + ASSERT_EQ(8, read(msi_eventfd, &value, 8)); + ASSERT_EQ(1, value); + } + + vfio_pci_irq_disable(self->device, variant->irq_index); +} + +TEST_F(vfio_pci_device_test, reset) +{ + if (!(self->device->info.flags & VFIO_DEVICE_FLAGS_RESET)) + SKIP(return, "Device does not support reset\n"); + + vfio_pci_device_reset(self->device); +} + +int main(int argc, char *argv[]) +{ + device_bdf = vfio_selftests_get_bdf(&argc, argv); + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c new file mode 100644 index 000000000000..2dbd70b7db62 --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <linux/sizes.h> +#include <linux/vfio.h> + +#include <vfio_util.h> + +#include "../kselftest_harness.h" + +static const char *device_bdf; + +#define ASSERT_NO_MSI(_eventfd) do { \ + u64 __value; \ + \ + ASSERT_EQ(-1, read(_eventfd, &__value, 8)); \ + ASSERT_EQ(EAGAIN, errno); \ +} while (0) + +static void region_setup(struct vfio_pci_device *device, + struct vfio_dma_region *region, u64 size) +{ + const int flags = MAP_SHARED | MAP_ANONYMOUS; + const int prot = PROT_READ | PROT_WRITE; + void *vaddr; + + vaddr = mmap(NULL, size, prot, flags, -1, 0); + VFIO_ASSERT_NE(vaddr, MAP_FAILED); + + region->vaddr = vaddr; + region->iova = (u64)vaddr; + region->size = size; + + vfio_pci_dma_map(device, region); +} + +static void region_teardown(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + vfio_pci_dma_unmap(device, region); + VFIO_ASSERT_EQ(munmap(region->vaddr, region->size), 0); +} + +FIXTURE(vfio_pci_driver_test) { + struct vfio_pci_device *device; + struct vfio_dma_region memcpy_region; + void *vaddr; + int msi_fd; + + u64 size; + void *src; + void *dst; + iova_t src_iova; + iova_t dst_iova; + iova_t unmapped_iova; +}; + +FIXTURE_VARIANT(vfio_pci_driver_test) { + const char *iommu_mode; +}; + +#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \ +FIXTURE_VARIANT_ADD(vfio_pci_driver_test, _iommu_mode) { \ + .iommu_mode = #_iommu_mode, \ +} + +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(); + +FIXTURE_SETUP(vfio_pci_driver_test) +{ + struct vfio_pci_driver *driver; + + self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + + driver = &self->device->driver; + + region_setup(self->device, &self->memcpy_region, SZ_1G); + region_setup(self->device, &driver->region, SZ_2M); + + /* Any IOVA that doesn't overlap memcpy_region and driver->region. */ + self->unmapped_iova = 8UL * SZ_1G; + + vfio_pci_driver_init(self->device); + self->msi_fd = self->device->msi_eventfds[driver->msi]; + + /* + * Use the maximum size supported by the device for memcpy operations, + * slimmed down to fit into the memcpy region (divided by 2 so src and + * dst regions do not overlap). + */ + self->size = self->device->driver.max_memcpy_size; + self->size = min(self->size, self->memcpy_region.size / 2); + + self->src = self->memcpy_region.vaddr; + self->dst = self->src + self->size; + + self->src_iova = to_iova(self->device, self->src); + self->dst_iova = to_iova(self->device, self->dst); +} + +FIXTURE_TEARDOWN(vfio_pci_driver_test) +{ + struct vfio_pci_driver *driver = &self->device->driver; + + vfio_pci_driver_remove(self->device); + + region_teardown(self->device, &self->memcpy_region); + region_teardown(self->device, &driver->region); + + vfio_pci_device_cleanup(self->device); +} + +TEST_F(vfio_pci_driver_test, init_remove) +{ + int i; + + for (i = 0; i < 10; i++) { + vfio_pci_driver_remove(self->device); + vfio_pci_driver_init(self->device); + } +} + +TEST_F(vfio_pci_driver_test, memcpy_success) +{ + fcntl_set_nonblock(self->msi_fd); + + memset(self->src, 'x', self->size); + memset(self->dst, 'y', self->size); + + ASSERT_EQ(0, vfio_pci_driver_memcpy(self->device, + self->src_iova, + self->dst_iova, + self->size)); + + ASSERT_EQ(0, memcmp(self->src, self->dst, self->size)); + ASSERT_NO_MSI(self->msi_fd); +} + +TEST_F(vfio_pci_driver_test, memcpy_from_unmapped_iova) +{ + fcntl_set_nonblock(self->msi_fd); + + /* + * Ignore the return value since not all devices will detect and report + * accesses to unmapped IOVAs as errors. + */ + vfio_pci_driver_memcpy(self->device, self->unmapped_iova, + self->dst_iova, self->size); + + ASSERT_NO_MSI(self->msi_fd); +} + +TEST_F(vfio_pci_driver_test, memcpy_to_unmapped_iova) +{ + fcntl_set_nonblock(self->msi_fd); + + /* + * Ignore the return value since not all devices will detect and report + * accesses to unmapped IOVAs as errors. + */ + vfio_pci_driver_memcpy(self->device, self->src_iova, + self->unmapped_iova, self->size); + + ASSERT_NO_MSI(self->msi_fd); +} + +TEST_F(vfio_pci_driver_test, send_msi) +{ + u64 value; + + vfio_pci_driver_send_msi(self->device); + ASSERT_EQ(8, read(self->msi_fd, &value, 8)); + ASSERT_EQ(1, value); +} + +TEST_F(vfio_pci_driver_test, mix_and_match) +{ + u64 value; + int i; + + for (i = 0; i < 10; i++) { + memset(self->src, 'x', self->size); + memset(self->dst, 'y', self->size); + + ASSERT_EQ(0, vfio_pci_driver_memcpy(self->device, + self->src_iova, + self->dst_iova, + self->size)); + + ASSERT_EQ(0, memcmp(self->src, self->dst, self->size)); + + vfio_pci_driver_memcpy(self->device, + self->unmapped_iova, + self->dst_iova, + self->size); + + vfio_pci_driver_send_msi(self->device); + ASSERT_EQ(8, read(self->msi_fd, &value, 8)); + ASSERT_EQ(1, value); + } +} + +TEST_F_TIMEOUT(vfio_pci_driver_test, memcpy_storm, 60) +{ + struct vfio_pci_driver *driver = &self->device->driver; + u64 total_size; + u64 count; + + fcntl_set_nonblock(self->msi_fd); + + /* + * Perform up to 250GiB worth of DMA reads and writes across several + * memcpy operations. Some devices can support even more but the test + * will take too long. + */ + total_size = 250UL * SZ_1G; + count = min(total_size / self->size, driver->max_memcpy_count); + + printf("Kicking off %lu memcpys of size 0x%lx\n", count, self->size); + vfio_pci_driver_memcpy_start(self->device, + self->src_iova, + self->dst_iova, + self->size, count); + + ASSERT_EQ(0, vfio_pci_driver_memcpy_wait(self->device)); + ASSERT_NO_MSI(self->msi_fd); +} + +int main(int argc, char *argv[]) +{ + struct vfio_pci_device *device; + + device_bdf = vfio_selftests_get_bdf(&argc, argv); + + device = vfio_pci_device_init(device_bdf, default_iommu_mode); + if (!device->driver.ops) { + fprintf(stderr, "No driver found for device %s\n", device_bdf); + return KSFT_SKIP; + } + vfio_pci_device_cleanup(device); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index a1f506ba5578..4f09c5db0c7f 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -332,6 +332,12 @@ int main(int argc, char *argv[]) if (oneshot) goto end; + /* Check if WDIOF_KEEPALIVEPING is supported */ + if (!(info.options & WDIOF_KEEPALIVEPING)) { + printf("WDIOC_KEEPALIVE not supported by this device\n"); + goto end; + } + printf("Watchdog Ticking Away!\n"); /* diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 0a5381717e9f..936b18be07cf 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -20,9 +20,10 @@ CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y -CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP_NF_IPTABLES_LEGACY=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_MANGLE=y @@ -48,7 +49,6 @@ CONFIG_JUMP_LABEL=y CONFIG_FUTEX=y CONFIG_SHMEM=y CONFIG_SLUB=y -CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_SMP=y CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README index 110b34834a6f..82921c75681c 100644 --- a/tools/testing/selftests/zram/README +++ b/tools/testing/selftests/zram/README @@ -14,7 +14,6 @@ Statistics for individual zram devices are exported through sysfs nodes at Kconfig required: CONFIG_ZRAM=y CONFIG_CRYPTO_LZ4=y -CONFIG_ZPOOL=y CONFIG_ZSMALLOC=y ZRAM Testcases diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 0f97fb0d19e1..8c7257155958 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -16,21 +16,6 @@ int nr_allocated; int preempt_count; int test_verbose; -struct kmem_cache { - pthread_mutex_t lock; - unsigned int size; - unsigned int align; - int nr_objs; - void *objs; - void (*ctor)(void *); - unsigned int non_kernel; - unsigned long nr_allocated; - unsigned long nr_tallocated; - bool exec_callback; - void (*callback)(void *); - void *private; -}; - void kmem_cache_set_callback(struct kmem_cache *cachep, void (*callback)(void *)) { cachep->callback = callback; @@ -79,7 +64,8 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, if (!(gfp & __GFP_DIRECT_RECLAIM)) { if (!cachep->non_kernel) { - cachep->exec_callback = true; + if (cachep->callback) + cachep->exec_callback = true; return NULL; } @@ -152,6 +138,12 @@ void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) if (kmalloc_verbose) pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); + if (cachep->exec_callback) { + if (cachep->callback) + cachep->callback(cachep->private); + cachep->exec_callback = false; + } + pthread_mutex_lock(&cachep->lock); for (int i = 0; i < size; i++) kmem_cache_free_locked(cachep, list[i]); @@ -219,6 +211,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, for (i = 0; i < size; i++) __kmem_cache_free_locked(cachep, p[i]); pthread_mutex_unlock(&cachep->lock); + if (cachep->callback) + cachep->exec_callback = true; return 0; } @@ -234,26 +228,112 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } struct kmem_cache * -kmem_cache_create(const char *name, unsigned int size, unsigned int align, - unsigned int flags, void (*ctor)(void *)) +__kmem_cache_create_args(const char *name, unsigned int size, + struct kmem_cache_args *args, + unsigned int flags) { struct kmem_cache *ret = malloc(sizeof(*ret)); pthread_mutex_init(&ret->lock, NULL); ret->size = size; - ret->align = align; + ret->align = args->align; + ret->sheaf_capacity = args->sheaf_capacity; ret->nr_objs = 0; ret->nr_allocated = 0; ret->nr_tallocated = 0; ret->objs = NULL; - ret->ctor = ctor; + ret->ctor = args->ctor; ret->non_kernel = 0; ret->exec_callback = false; ret->callback = NULL; ret->private = NULL; + return ret; } +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) +{ + struct slab_sheaf *sheaf; + unsigned int capacity; + + if (s->exec_callback) { + if (s->callback) + s->callback(s->private); + s->exec_callback = false; + } + + capacity = max(size, s->sheaf_capacity); + + sheaf = calloc(1, sizeof(*sheaf) + sizeof(void *) * capacity); + if (!sheaf) + return NULL; + + sheaf->cache = s; + sheaf->capacity = capacity; + sheaf->size = kmem_cache_alloc_bulk(s, gfp, size, sheaf->objects); + if (!sheaf->size) { + free(sheaf); + return NULL; + } + + return sheaf; +} + +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size) +{ + struct slab_sheaf *sheaf = *sheafp; + int refill; + + if (sheaf->size >= size) + return 0; + + if (size > sheaf->capacity) { + sheaf = kmem_cache_prefill_sheaf(s, gfp, size); + if (!sheaf) + return -ENOMEM; + + kmem_cache_return_sheaf(s, gfp, *sheafp); + *sheafp = sheaf; + return 0; + } + + refill = kmem_cache_alloc_bulk(s, gfp, size - sheaf->size, + &sheaf->objects[sheaf->size]); + if (!refill) + return -ENOMEM; + + sheaf->size += refill; + return 0; +} + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf) +{ + if (sheaf->size) + kmem_cache_free_bulk(s, sheaf->size, &sheaf->objects[0]); + + free(sheaf); +} + +void * +kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf) +{ + void *obj; + + if (sheaf->size == 0) { + printf("Nothing left in sheaf!\n"); + return NULL; + } + + obj = sheaf->objects[--sheaf->size]; + sheaf->objects[sheaf->size] = NULL; + + return obj; +} + /* * Test the test infrastructure for kem_cache_alloc/free and bulk counterparts. */ diff --git a/tools/testing/shared/linux/maple_tree.h b/tools/testing/shared/linux/maple_tree.h index f67d47d32857..7d0fadef0f11 100644 --- a/tools/testing/shared/linux/maple_tree.h +++ b/tools/testing/shared/linux/maple_tree.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0+ */ -#define atomic_t int32_t -#define atomic_inc(x) uatomic_inc(x) -#define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) uatomic_set(x, y) +#include <linux/atomic.h> + #define U8_MAX UCHAR_MAX #include "../../../../include/linux/maple_tree.h" diff --git a/tools/testing/shared/maple-shared.h b/tools/testing/shared/maple-shared.h index dc4d30f3860b..2a1e9a8594a2 100644 --- a/tools/testing/shared/maple-shared.h +++ b/tools/testing/shared/maple-shared.h @@ -10,4 +10,15 @@ #include <time.h> #include "linux/init.h" +void maple_rcu_cb(struct rcu_head *head); +#define rcu_cb maple_rcu_cb + +#define kfree_rcu(_struct, _memb) \ +do { \ + typeof(_struct) _p_struct = (_struct); \ + \ + call_rcu(&((_p_struct)->_memb), rcu_cb); \ +} while(0); + + #endif /* __MAPLE_SHARED_H__ */ diff --git a/tools/testing/shared/maple-shim.c b/tools/testing/shared/maple-shim.c index 640df76f483e..16252ee616c0 100644 --- a/tools/testing/shared/maple-shim.c +++ b/tools/testing/shared/maple-shim.c @@ -3,5 +3,12 @@ /* Very simple shim around the maple tree. */ #include "maple-shared.h" +#include <linux/slab.h> #include "../../../lib/maple_tree.c" + +void maple_rcu_cb(struct rcu_head *head) { + struct maple_node *node = container_of(head, struct maple_node, rcu); + + kmem_cache_free(maple_node_cache, node); +} diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk index 923ee2492256..5bcdf26c8a9d 100644 --- a/tools/testing/shared/shared.mk +++ b/tools/testing/shared/shared.mk @@ -1,7 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.arch -CFLAGS += -I../shared -I. -I../../include -I../../../lib -g -Og -Wall \ +CFLAGS += -I../shared -I. -I../../include -I../../arch/$(SRCARCH)/include \ + -I../../../lib -g -Og -Wall \ -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined +CFLAGS += $(EXTRA_CFLAGS) LDFLAGS += -fsanitize=address -fsanitize=undefined LDLIBS += -lpthread -lurcu LIBS := slab.o find_bit.o bitmap.o hweight.o vsprintf.o @@ -11,6 +14,7 @@ SHARED_DEPS = Makefile ../shared/shared.mk ../shared/*.h generated/map-shift.h \ generated/bit-length.h generated/autoconf.h \ ../../include/linux/*.h \ ../../include/asm/*.h \ + ../../arch/$(SRCARCH)/include/asm/*.h \ ../../../include/linux/xarray.h \ ../../../include/linux/maple_tree.h \ ../../../include/linux/radix-tree.h \ diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h deleted file mode 100644 index 788c597c4fde..000000000000 --- a/tools/testing/vma/linux/atomic.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -#ifndef _LINUX_ATOMIC_H -#define _LINUX_ATOMIC_H - -#define atomic_t int32_t -#define atomic_inc(x) uatomic_inc(x) -#define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) uatomic_set(x, y) -#define U8_MAX UCHAR_MAX - -#ifndef atomic_cmpxchg_relaxed -#define atomic_cmpxchg_relaxed uatomic_cmpxchg -#define atomic_cmpxchg_release uatomic_cmpxchg -#endif /* atomic_cmpxchg_relaxed */ - -#endif /* _LINUX_ATOMIC_H */ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 3639aa8dd2b0..dc976a285ad2 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -21,11 +21,13 @@ #include <stdlib.h> +#include <linux/atomic.h> #include <linux/list.h> #include <linux/maple_tree.h> #include <linux/mm.h> #include <linux/rbtree.h> #include <linux/refcount.h> +#include <linux/slab.h> extern unsigned long stack_guard_gap; #ifdef CONFIG_MMU @@ -249,6 +251,14 @@ struct mutex {}; #define DEFINE_MUTEX(mutexname) \ struct mutex mutexname = {} +#define DECLARE_BITMAP(name, bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +#define NUM_MM_FLAG_BITS (64) +typedef struct { + __private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS); +} mm_flags_t; + struct mm_struct { struct maple_tree mm_mt; int map_count; /* number of VMAs */ @@ -260,7 +270,7 @@ struct mm_struct { unsigned long def_flags; - unsigned long flags; /* Must use atomic bitops to access */ + mm_flags_t flags; /* Must use mm_flags_* helpers to access */ }; struct vm_area_struct; @@ -274,13 +284,14 @@ struct vm_area_struct; */ struct vm_area_desc { /* Immutable state. */ - struct mm_struct *mm; + const struct mm_struct *const mm; + struct file *const file; /* May vary from vm_file in stacked callers. */ unsigned long start; unsigned long end; /* Mutable fields. Populated with initial state. */ pgoff_t pgoff; - struct file *file; + struct file *vm_file; vm_flags_t vm_flags; pgprot_t page_prot; @@ -467,13 +478,21 @@ struct vm_operations_struct { struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx); #endif +#ifdef CONFIG_FIND_NORMAL_PAGE /* - * Called by vm_normal_page() for special PTEs to find the - * page for @addr. This is useful if the default behavior - * (using pte_page()) would not find the correct page. + * Called by vm_normal_page() for special PTEs in @vma at @addr. This + * allows for returning a "normal" page from vm_normal_page() even + * though the PTE indicates that the "struct page" either does not exist + * or should not be touched: "special". + * + * Do not add new users: this really only works when a "normal" page + * was mapped, but then the PTE got changed to something weird (+ + * marked special) that would not make pte_pfn() identify the originally + * inserted page. */ - struct page *(*find_special_page)(struct vm_area_struct *vma, - unsigned long addr); + struct page *(*find_normal_page)(struct vm_area_struct *vma, + unsigned long addr); +#endif /* CONFIG_FIND_NORMAL_PAGE */ }; struct vm_unmapped_area_info { @@ -509,65 +528,6 @@ struct pagetable_move_control { .len_in = len_, \ } -struct kmem_cache_args { - /** - * @align: The required alignment for the objects. - * - * %0 means no specific alignment is requested. - */ - unsigned int align; - /** - * @useroffset: Usercopy region offset. - * - * %0 is a valid offset, when @usersize is non-%0 - */ - unsigned int useroffset; - /** - * @usersize: Usercopy region size. - * - * %0 means no usercopy region is specified. - */ - unsigned int usersize; - /** - * @freeptr_offset: Custom offset for the free pointer - * in &SLAB_TYPESAFE_BY_RCU caches - * - * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer - * outside of the object. This might cause the object to grow in size. - * Cache creators that have a reason to avoid this can specify a custom - * free pointer offset in their struct where the free pointer will be - * placed. - * - * Note that placing the free pointer inside the object requires the - * caller to ensure that no fields are invalidated that are required to - * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for - * details). - * - * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset - * is specified, %use_freeptr_offset must be set %true. - * - * Note that @ctor currently isn't supported with custom free pointers - * as a @ctor requires an external free pointer. - */ - unsigned int freeptr_offset; - /** - * @use_freeptr_offset: Whether a @freeptr_offset is used. - */ - bool use_freeptr_offset; - /** - * @ctor: A constructor for the objects. - * - * The constructor is invoked for each object in a newly allocated slab - * page. It is the cache user's responsibility to free object in the - * same state as after calling the constructor, or deal appropriately - * with any differences between a freshly constructed and a reallocated - * object. - * - * %NULL means no constructor. - */ - void (*ctor)(void *); -}; - static inline void vma_iter_invalidate(struct vma_iterator *vmi) { mas_pause(&vmi->mas); @@ -652,40 +612,6 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_lock_seq = UINT_MAX; } -struct kmem_cache { - const char *name; - size_t object_size; - struct kmem_cache_args *args; -}; - -static inline struct kmem_cache *__kmem_cache_create(const char *name, - size_t object_size, - struct kmem_cache_args *args) -{ - struct kmem_cache *ret = malloc(sizeof(struct kmem_cache)); - - ret->name = name; - ret->object_size = object_size; - ret->args = args; - - return ret; -} - -#define kmem_cache_create(__name, __object_size, __args, ...) \ - __kmem_cache_create((__name), (__object_size), (__args)) - -static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) -{ - (void)gfpflags; - - return calloc(s->object_size, 1); -} - -static inline void kmem_cache_free(struct kmem_cache *s, void *x) -{ - free(x); -} - /* * These are defined in vma.h, but sadly vm_stat_account() is referenced by * kernel/fork.c, so we have to these broadly available there, and temporarily @@ -842,11 +768,11 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } -static inline void fput(struct file *) +static inline void fput(struct file *file) { } -static inline void mpol_put(struct mempolicy *) +static inline void mpol_put(struct mempolicy *pol) { } @@ -854,15 +780,15 @@ static inline void lru_add_drain(void) { } -static inline void tlb_gather_mmu(struct mmu_gather *, struct mm_struct *) +static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) { } -static inline void update_hiwater_rss(struct mm_struct *) +static inline void update_hiwater_rss(struct mm_struct *mm) { } -static inline void update_hiwater_vm(struct mm_struct *) +static inline void update_hiwater_vm(struct mm_struct *mm) { } @@ -871,36 +797,23 @@ static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, unsigned long end_addr, unsigned long tree_end, bool mm_wr_locked) { - (void)tlb; - (void)mas; - (void)vma; - (void)start_addr; - (void)end_addr; - (void)tree_end; - (void)mm_wr_locked; } static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *vma, unsigned long floor, unsigned long ceiling, bool mm_wr_locked) { - (void)tlb; - (void)mas; - (void)vma; - (void)floor; - (void)ceiling; - (void)mm_wr_locked; } -static inline void mapping_unmap_writable(struct address_space *) +static inline void mapping_unmap_writable(struct address_space *mapping) { } -static inline void flush_dcache_mmap_lock(struct address_space *) +static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } -static inline void tlb_finish_mmu(struct mmu_gather *) +static inline void tlb_finish_mmu(struct mmu_gather *tlb) { } @@ -909,7 +822,7 @@ static inline struct file *get_file(struct file *f) return f; } -static inline int vma_dup_policy(struct vm_area_struct *, struct vm_area_struct *) +static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { return 0; } @@ -936,10 +849,6 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long end, struct vm_area_struct *next) { - (void)vma; - (void)start; - (void)end; - (void)next; } static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} @@ -959,51 +868,48 @@ static inline void vm_acct_memory(long pages) { } -static inline void vma_interval_tree_insert(struct vm_area_struct *, - struct rb_root_cached *) +static inline void vma_interval_tree_insert(struct vm_area_struct *vma, + struct rb_root_cached *rb) { } -static inline void vma_interval_tree_remove(struct vm_area_struct *, - struct rb_root_cached *) +static inline void vma_interval_tree_remove(struct vm_area_struct *vma, + struct rb_root_cached *rb) { } -static inline void flush_dcache_mmap_unlock(struct address_space *) +static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } -static inline void anon_vma_interval_tree_insert(struct anon_vma_chain*, - struct rb_root_cached *) +static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc, + struct rb_root_cached *rb) { } -static inline void anon_vma_interval_tree_remove(struct anon_vma_chain*, - struct rb_root_cached *) +static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc, + struct rb_root_cached *rb) { } -static inline void uprobe_mmap(struct vm_area_struct *) +static inline void uprobe_mmap(struct vm_area_struct *vma) { } static inline void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - (void)vma; - (void)start; - (void)end; } -static inline void i_mmap_lock_write(struct address_space *) +static inline void i_mmap_lock_write(struct address_space *mapping) { } -static inline void anon_vma_lock_write(struct anon_vma *) +static inline void anon_vma_lock_write(struct anon_vma *anon_vma) { } -static inline void vma_assert_write_locked(struct vm_area_struct *) +static inline void vma_assert_write_locked(struct vm_area_struct *vma) { } @@ -1013,16 +919,16 @@ static inline void unlink_anon_vmas(struct vm_area_struct *vma) vma->anon_vma->was_unlinked = true; } -static inline void anon_vma_unlock_write(struct anon_vma *) +static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) { } -static inline void i_mmap_unlock_write(struct address_space *) +static inline void i_mmap_unlock_write(struct address_space *mapping) { } -static inline void anon_vma_merge(struct vm_area_struct *, - struct vm_area_struct *) +static inline void anon_vma_merge(struct vm_area_struct *vma, + struct vm_area_struct *next) { } @@ -1031,27 +937,22 @@ static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long end, struct list_head *unmaps) { - (void)vma; - (void)start; - (void)end; - (void)unmaps; - return 0; } -static inline void mmap_write_downgrade(struct mm_struct *) +static inline void mmap_write_downgrade(struct mm_struct *mm) { } -static inline void mmap_read_unlock(struct mm_struct *) +static inline void mmap_read_unlock(struct mm_struct *mm) { } -static inline void mmap_write_unlock(struct mm_struct *) +static inline void mmap_write_unlock(struct mm_struct *mm) { } -static inline int mmap_write_lock_killable(struct mm_struct *) +static inline int mmap_write_lock_killable(struct mm_struct *mm) { return 0; } @@ -1060,10 +961,6 @@ static inline bool can_modify_mm(struct mm_struct *mm, unsigned long start, unsigned long end) { - (void)mm; - (void)start; - (void)end; - return true; } @@ -1071,16 +968,13 @@ static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - (void)mm; - (void)start; - (void)end; } -static inline void mmap_assert_locked(struct mm_struct *) +static inline void mmap_assert_locked(struct mm_struct *mm) { } -static inline bool mpol_equal(struct mempolicy *, struct mempolicy *) +static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { return true; } @@ -1088,63 +982,62 @@ static inline bool mpol_equal(struct mempolicy *, struct mempolicy *) static inline void khugepaged_enter_vma(struct vm_area_struct *vma, vm_flags_t vm_flags) { - (void)vma; - (void)vm_flags; } -static inline bool mapping_can_writeback(struct address_space *) +static inline bool mapping_can_writeback(struct address_space *mapping) { return true; } -static inline bool is_vm_hugetlb_page(struct vm_area_struct *) +static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) { return false; } -static inline bool vma_soft_dirty_enabled(struct vm_area_struct *) +static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) { return false; } -static inline bool userfaultfd_wp(struct vm_area_struct *) +static inline bool userfaultfd_wp(struct vm_area_struct *vma) { return false; } -static inline void mmap_assert_write_locked(struct mm_struct *) +static inline void mmap_assert_write_locked(struct mm_struct *mm) { } -static inline void mutex_lock(struct mutex *) +static inline void mutex_lock(struct mutex *lock) { } -static inline void mutex_unlock(struct mutex *) +static inline void mutex_unlock(struct mutex *lock) { } -static inline bool mutex_is_locked(struct mutex *) +static inline bool mutex_is_locked(struct mutex *lock) { return true; } -static inline bool signal_pending(void *) +static inline bool signal_pending(void *p) { return false; } -static inline bool is_file_hugepages(struct file *) +static inline bool is_file_hugepages(struct file *file) { return false; } -static inline int security_vm_enough_memory_mm(struct mm_struct *, long) +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { return 0; } -static inline bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long) +static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, + unsigned long npages) { return true; } @@ -1169,7 +1062,7 @@ static inline void vm_flags_clear(struct vm_area_struct *vma, vma->__vm_flags &= ~flags; } -static inline int shmem_zero_setup(struct vm_area_struct *) +static inline int shmem_zero_setup(struct vm_area_struct *vma) { return 0; } @@ -1179,20 +1072,20 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma) vma->vm_ops = NULL; } -static inline void ksm_add_vma(struct vm_area_struct *) +static inline void ksm_add_vma(struct vm_area_struct *vma) { } -static inline void perf_event_mmap(struct vm_area_struct *) +static inline void perf_event_mmap(struct vm_area_struct *vma) { } -static inline bool vma_is_dax(struct vm_area_struct *) +static inline bool vma_is_dax(struct vm_area_struct *vma) { return false; } -static inline struct vm_area_struct *get_gate_vma(struct mm_struct *) +static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } @@ -1217,16 +1110,16 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma) WRITE_ONCE(vma->vm_page_prot, vm_page_prot); } -static inline bool arch_validate_flags(vm_flags_t) +static inline bool arch_validate_flags(vm_flags_t flags) { return true; } -static inline void vma_close(struct vm_area_struct *) +static inline void vma_close(struct vm_area_struct *vma) { } -static inline int mmap_file(struct file *, struct vm_area_struct *) +static inline int mmap_file(struct file *file, struct vm_area_struct *vma) { return 0; } @@ -1282,8 +1175,8 @@ static inline bool capable(int cap) return true; } -static inline bool mlock_future_ok(struct mm_struct *mm, vm_flags_t vm_flags, - unsigned long bytes) +static inline bool mlock_future_ok(const struct mm_struct *mm, + vm_flags_t vm_flags, unsigned long bytes) { unsigned long locked_pages, limit_pages; @@ -1325,6 +1218,13 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, { } +# define ACCESS_PRIVATE(p, member) ((p)->member) + +static inline bool mm_flags_test(int flag, const struct mm_struct *mm) +{ + return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); +} + /* * Denies creating a writable executable mapping or gaining executable permissions. * @@ -1355,7 +1255,7 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, static inline bool map_deny_write_exec(unsigned long old, unsigned long new) { /* If MDWE is disabled, we have nothing to deny. */ - if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + if (mm_flags_test(MMF_HAS_MDWE, current->mm)) return false; /* If the new VMA is not executable, we have nothing to deny. */ @@ -1375,21 +1275,12 @@ static inline bool map_deny_write_exec(unsigned long old, unsigned long new) static inline int mapping_map_writable(struct address_space *mapping) { - int c = atomic_read(&mapping->i_mmap_writable); - - /* Derived from the raw_atomic_inc_unless_negative() implementation. */ - do { - if (c < 0) - return -EPERM; - } while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1)); - - return 0; + return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? + 0 : -EPERM; } static inline unsigned long move_page_tables(struct pagetable_move_control *pmc) { - (void)pmc; - return 0; } @@ -1397,67 +1288,61 @@ static inline void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { - (void)tlb; - (void)addr; - (void)end; - (void)floor; - (void)ceiling; } static inline int ksm_execve(struct mm_struct *mm) { - (void)mm; - return 0; } static inline void ksm_exit(struct mm_struct *mm) { - (void)mm; } static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) { - (void)vma; - (void)reset_refcnt; + if (reset_refcnt) + refcount_set(&vma->vm_refcnt, 0); } static inline void vma_numab_state_init(struct vm_area_struct *vma) { - (void)vma; } static inline void vma_numab_state_free(struct vm_area_struct *vma) { - (void)vma; } static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, struct vm_area_struct *new_vma) { - (void)orig_vma; - (void)new_vma; } static inline void free_anon_vma_name(struct vm_area_struct *vma) { - (void)vma; } /* Declared in vma.h. */ static inline void set_vma_from_desc(struct vm_area_struct *vma, struct vm_area_desc *desc); -static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, - struct vm_area_desc *desc); - -static int compat_vma_mmap_prepare(struct file *file, - struct vm_area_struct *vma) +static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op, + struct file *file, struct vm_area_struct *vma) { - struct vm_area_desc desc; + struct vm_area_desc desc = { + .mm = vma->vm_mm, + .file = vma->vm_file, + .start = vma->vm_start, + .end = vma->vm_end, + + .pgoff = vma->vm_pgoff, + .vm_file = vma->vm_file, + .vm_flags = vma->vm_flags, + .page_prot = vma->vm_page_prot, + }; int err; - err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + err = f_op->mmap_prepare(&desc); if (err) return err; set_vma_from_desc(vma, &desc); @@ -1465,6 +1350,12 @@ static int compat_vma_mmap_prepare(struct file *file, return 0; } +static inline int compat_vma_mmap_prepare(struct file *file, + struct vm_area_struct *vma) +{ + return __compat_vma_mmap_prepare(file->f_op, file, vma); +} + /* Did the driver provide valid mmap hook configuration? */ static inline bool can_mmap_file(struct file *file) { @@ -1495,7 +1386,6 @@ static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) { - (void)vma; } static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) @@ -1506,13 +1396,13 @@ static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) fput(file); } -static inline bool shmem_file(struct file *) +static inline bool shmem_file(struct file *file) { return false; } -static inline vm_flags_t ksm_vma_flags(const struct mm_struct *, const struct file *, - vm_flags_t vm_flags) +static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, + const struct file *file, vm_flags_t vm_flags) { return vm_flags; } diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 7b861a8e997a..d843643ced6b 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -756,7 +756,6 @@ void setsockopt_ull_check(int fd, int level, int optname, fail: fprintf(stderr, "%s val %llu\n", errmsg, val); exit(EXIT_FAILURE); -; } /* Set "int" socket option and check that it's indeed set */ |