summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/vfio')
-rw-r--r--tools/testing/selftests/vfio/.gitignore10
-rw-r--r--tools/testing/selftests/vfio/Makefile29
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c416
l---------tools/testing/selftests/vfio/lib/drivers/dsa/registers.h1
l---------tools/testing/selftests/vfio/lib/drivers/ioat/hw.h1
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c235
l---------tools/testing/selftests/vfio/lib/drivers/ioat/registers.h1
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h26
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/assert.h54
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iommu.h76
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h23
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h125
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h97
-rw-r--r--tools/testing/selftests/vfio/lib/iommu.c465
-rw-r--r--tools/testing/selftests/vfio/lib/iova_allocator.c94
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.c78
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk29
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c378
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c112
-rwxr-xr-xtools/testing/selftests/vfio/scripts/cleanup.sh41
-rwxr-xr-xtools/testing/selftests/vfio/scripts/lib.sh42
-rwxr-xr-xtools/testing/selftests/vfio/scripts/run.sh16
-rwxr-xr-xtools/testing/selftests/vfio/scripts/setup.sh48
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c312
-rw-r--r--tools/testing/selftests/vfio/vfio_iommufd_setup_test.c127
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c168
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_test.c182
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_driver_test.c263
28 files changed, 3449 insertions, 0 deletions
diff --git a/tools/testing/selftests/vfio/.gitignore b/tools/testing/selftests/vfio/.gitignore
new file mode 100644
index 000000000000..7fadc19d3bca
--- /dev/null
+++ b/tools/testing/selftests/vfio/.gitignore
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*
+!/**/
+!*.c
+!*.h
+!*.S
+!*.sh
+!*.mk
+!.gitignore
+!Makefile
diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile
new file mode 100644
index 000000000000..3c796ca99a50
--- /dev/null
+++ b/tools/testing/selftests/vfio/Makefile
@@ -0,0 +1,29 @@
+CFLAGS = $(KHDR_INCLUDES)
+TEST_GEN_PROGS += vfio_dma_mapping_test
+TEST_GEN_PROGS += vfio_iommufd_setup_test
+TEST_GEN_PROGS += vfio_pci_device_test
+TEST_GEN_PROGS += vfio_pci_device_init_perf_test
+TEST_GEN_PROGS += vfio_pci_driver_test
+
+TEST_FILES += scripts/cleanup.sh
+TEST_FILES += scripts/lib.sh
+TEST_FILES += scripts/run.sh
+TEST_FILES += scripts/setup.sh
+
+include ../lib.mk
+include lib/libvfio.mk
+
+CFLAGS += -I$(top_srcdir)/tools/include
+CFLAGS += -MD
+CFLAGS += $(EXTRA_CFLAGS)
+
+LDFLAGS += -pthread
+
+$(TEST_GEN_PROGS): %: %.o $(LIBVFIO_O)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< $(LIBVFIO_O) $(LDLIBS) -o $@
+
+TEST_GEN_PROGS_O = $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_PROGS_O) $(LIBVFIO_O))
+-include $(TEST_DEP_FILES)
+
+EXTRA_CLEAN += $(TEST_GEN_PROGS_O) $(TEST_DEP_FILES)
diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
new file mode 100644
index 000000000000..c75045bcab79
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <unistd.h>
+
+#include <linux/bits.h>
+#include <linux/errno.h>
+#include <linux/idxd.h>
+#include <linux/io.h>
+#include <linux/pci_ids.h>
+#include <linux/sizes.h>
+
+#include <libvfio.h>
+
+#include "registers.h"
+
+/* Vectors 1+ are available for work queue completion interrupts. */
+#define MSIX_VECTOR 1
+
+struct dsa_state {
+ /* Descriptors for copy and batch operations. */
+ struct dsa_hw_desc batch[32];
+ struct dsa_hw_desc copy[1024];
+
+ /* Completion records for copy and batch operations. */
+ struct dsa_completion_record copy_completion;
+ struct dsa_completion_record batch_completion;
+
+ /* Cached device registers (and derived data) for easy access */
+ union gen_cap_reg gen_cap;
+ union wq_cap_reg wq_cap;
+ union group_cap_reg group_cap;
+ union engine_cap_reg engine_cap;
+ union offsets_reg table_offsets;
+ void *wqcfg_table;
+ void *grpcfg_table;
+ u64 max_batches;
+ u64 max_copies_per_batch;
+
+ /* The number of ongoing memcpy operations. */
+ u64 memcpy_count;
+
+ /* Buffers used by dsa_send_msi() to generate an interrupt */
+ u64 send_msi_src;
+ u64 send_msi_dst;
+};
+
+static inline struct dsa_state *to_dsa_state(struct vfio_pci_device *device)
+{
+ return device->driver.region.vaddr;
+}
+
+static bool dsa_int_handle_request_required(struct vfio_pci_device *device)
+{
+ void *bar0 = device->bars[0].vaddr;
+ union gen_cap_reg gen_cap;
+ u32 cmd_cap;
+
+ gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET);
+ if (!gen_cap.cmd_cap)
+ return false;
+
+ cmd_cap = readl(bar0 + IDXD_CMDCAP_OFFSET);
+ return (cmd_cap >> IDXD_CMD_REQUEST_INT_HANDLE) & 1;
+}
+
+static int dsa_probe(struct vfio_pci_device *device)
+{
+ if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_DSA_SPR0))
+ return -EINVAL;
+
+ if (dsa_int_handle_request_required(device)) {
+ dev_err(device, "Device requires requesting interrupt handles\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void dsa_check_sw_err(struct vfio_pci_device *device)
+{
+ void *reg = device->bars[0].vaddr + IDXD_SWERR_OFFSET;
+ union sw_err_reg err = {};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(err.bits); i++) {
+ err.bits[i] = readq(reg + offsetof(union sw_err_reg, bits[i]));
+
+ /* No errors */
+ if (i == 0 && !err.valid)
+ return;
+ }
+
+ dev_err(device, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
+ err.bits[0], err.bits[1], err.bits[2], err.bits[3]);
+
+ dev_err(device, " valid: 0x%x\n", err.valid);
+ dev_err(device, " overflow: 0x%x\n", err.overflow);
+ dev_err(device, " desc_valid: 0x%x\n", err.desc_valid);
+ dev_err(device, " wq_idx_valid: 0x%x\n", err.wq_idx_valid);
+ dev_err(device, " batch: 0x%x\n", err.batch);
+ dev_err(device, " fault_rw: 0x%x\n", err.fault_rw);
+ dev_err(device, " priv: 0x%x\n", err.priv);
+ dev_err(device, " error: 0x%x\n", err.error);
+ dev_err(device, " wq_idx: 0x%x\n", err.wq_idx);
+ dev_err(device, " operation: 0x%x\n", err.operation);
+ dev_err(device, " pasid: 0x%x\n", err.pasid);
+ dev_err(device, " batch_idx: 0x%x\n", err.batch_idx);
+ dev_err(device, " invalid_flags: 0x%x\n", err.invalid_flags);
+ dev_err(device, " fault_addr: 0x%lx\n", err.fault_addr);
+
+ VFIO_FAIL("Software Error Detected!\n");
+}
+
+static void dsa_command(struct vfio_pci_device *device, u32 cmd)
+{
+ union idxd_command_reg cmd_reg = { .cmd = cmd };
+ u32 sleep_ms = 1, attempts = 5000 / sleep_ms;
+ void *bar0 = device->bars[0].vaddr;
+ u32 status;
+ u8 err;
+
+ writel(cmd_reg.bits, bar0 + IDXD_CMD_OFFSET);
+
+ for (;;) {
+ dsa_check_sw_err(device);
+
+ status = readl(bar0 + IDXD_CMDSTS_OFFSET);
+ if (!(status & IDXD_CMDSTS_ACTIVE))
+ break;
+
+ VFIO_ASSERT_GT(--attempts, 0);
+ usleep(sleep_ms * 1000);
+ }
+
+ err = status & IDXD_CMDSTS_ERR_MASK;
+ VFIO_ASSERT_EQ(err, 0, "Error issuing command 0x%x: 0x%x\n", cmd, err);
+}
+
+static void dsa_wq_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ union wq_cap_reg wq_cap = dsa->wq_cap;
+ union wqcfg wqcfg;
+ u64 wqcfg_size;
+ int i;
+
+ VFIO_ASSERT_GT((u32)wq_cap.num_wqs, 0);
+
+ wqcfg = (union wqcfg) {
+ .wq_size = wq_cap.total_wq_size,
+ .mode = 1,
+ .priority = 1,
+ /*
+ * Disable Address Translation Service (if enabled) so that VFIO
+ * selftests using this driver can generate I/O page faults.
+ */
+ .wq_ats_disable = wq_cap.wq_ats_support,
+ .max_xfer_shift = dsa->gen_cap.max_xfer_shift,
+ .max_batch_shift = dsa->gen_cap.max_batch_shift,
+ .op_config[0] = BIT(DSA_OPCODE_MEMMOVE) | BIT(DSA_OPCODE_BATCH),
+ };
+
+ wqcfg_size = 1UL << (wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
+
+ for (i = 0; i < wqcfg_size / sizeof(wqcfg.bits[0]); i++)
+ writel(wqcfg.bits[i], dsa->wqcfg_table + offsetof(union wqcfg, bits[i]));
+}
+
+static void dsa_group_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ union group_cap_reg group_cap = dsa->group_cap;
+ union engine_cap_reg engine_cap = dsa->engine_cap;
+
+ VFIO_ASSERT_GT((u32)group_cap.num_groups, 0);
+ VFIO_ASSERT_GT((u32)engine_cap.num_engines, 0);
+
+ /* Assign work queue 0 and engine 0 to group 0 */
+ writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, wqs[0]));
+ writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, engines));
+}
+
+static void dsa_register_cache_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ void *bar0 = device->bars[0].vaddr;
+
+ dsa->gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET);
+ dsa->wq_cap.bits = readq(bar0 + IDXD_WQCAP_OFFSET);
+ dsa->group_cap.bits = readq(bar0 + IDXD_GRPCAP_OFFSET);
+ dsa->engine_cap.bits = readq(bar0 + IDXD_ENGCAP_OFFSET);
+
+ dsa->table_offsets.bits[0] = readq(bar0 + IDXD_TABLE_OFFSET);
+ dsa->table_offsets.bits[1] = readq(bar0 + IDXD_TABLE_OFFSET + 8);
+
+ dsa->wqcfg_table = bar0 + dsa->table_offsets.wqcfg * IDXD_TABLE_MULT;
+ dsa->grpcfg_table = bar0 + dsa->table_offsets.grpcfg * IDXD_TABLE_MULT;
+
+ dsa->max_batches = 1U << (dsa->wq_cap.total_wq_size + IDXD_WQCFG_MIN);
+ dsa->max_batches = min(dsa->max_batches, ARRAY_SIZE(dsa->batch));
+
+ dsa->max_copies_per_batch = 1UL << dsa->gen_cap.max_batch_shift;
+ dsa->max_copies_per_batch = min(dsa->max_copies_per_batch, ARRAY_SIZE(dsa->copy));
+}
+
+static void dsa_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ VFIO_ASSERT_GE(device->driver.region.size, sizeof(*dsa));
+
+ vfio_pci_config_writew(device, PCI_COMMAND,
+ PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+
+ dsa_command(device, IDXD_CMD_RESET_DEVICE);
+
+ dsa_register_cache_init(device);
+ dsa_wq_init(device);
+ dsa_group_init(device);
+
+ dsa_command(device, IDXD_CMD_ENABLE_DEVICE);
+ dsa_command(device, IDXD_CMD_ENABLE_WQ);
+
+ vfio_pci_msix_enable(device, MSIX_VECTOR, 1);
+
+ device->driver.max_memcpy_count =
+ dsa->max_batches * dsa->max_copies_per_batch;
+ device->driver.max_memcpy_size = 1UL << dsa->gen_cap.max_xfer_shift;
+ device->driver.msi = MSIX_VECTOR;
+}
+
+static void dsa_remove(struct vfio_pci_device *device)
+{
+ dsa_command(device, IDXD_CMD_RESET_DEVICE);
+ vfio_pci_msix_disable(device);
+}
+
+static int dsa_completion_wait(struct vfio_pci_device *device,
+ struct dsa_completion_record *completion)
+{
+ u8 status;
+
+ for (;;) {
+ dsa_check_sw_err(device);
+
+ status = READ_ONCE(completion->status);
+ if (status)
+ break;
+
+ usleep(1000);
+ }
+
+ if (status == DSA_COMP_SUCCESS)
+ return 0;
+
+ dev_err(device, "Error detected during memcpy operation: 0x%x\n", status);
+ return -1;
+}
+
+static void dsa_copy_desc_init(struct vfio_pci_device *device,
+ struct dsa_hw_desc *desc,
+ iova_t src, iova_t dst, u64 size,
+ bool interrupt)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ u16 flags;
+
+ flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR;
+
+ if (interrupt)
+ flags |= IDXD_OP_FLAG_RCI;
+
+ *desc = (struct dsa_hw_desc) {
+ .opcode = DSA_OPCODE_MEMMOVE,
+ .flags = flags,
+ .priv = 1,
+ .src_addr = src,
+ .dst_addr = dst,
+ .xfer_size = size,
+ .completion_addr = to_iova(device, &dsa->copy_completion),
+ .int_handle = interrupt ? MSIX_VECTOR : 0,
+ };
+}
+
+static void dsa_batch_desc_init(struct vfio_pci_device *device,
+ struct dsa_hw_desc *desc,
+ u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ *desc = (struct dsa_hw_desc) {
+ .opcode = DSA_OPCODE_BATCH,
+ .flags = IDXD_OP_FLAG_CRAV,
+ .priv = 1,
+ .completion_addr = to_iova(device, &dsa->batch_completion),
+ .desc_list_addr = to_iova(device, &dsa->copy[0]),
+ .desc_count = count,
+ };
+}
+
+static void dsa_desc_write(struct vfio_pci_device *device, struct dsa_hw_desc *desc)
+{
+ /* Write the contents (not address) of the 64-byte descriptor to the device. */
+ iosubmit_cmds512(device->bars[2].vaddr, desc, 1);
+}
+
+static void dsa_memcpy_one(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, bool interrupt)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ memset(&dsa->copy_completion, 0, sizeof(dsa->copy_completion));
+
+ dsa_copy_desc_init(device, &dsa->copy[0], src, dst, size, interrupt);
+ dsa_desc_write(device, &dsa->copy[0]);
+}
+
+static void dsa_memcpy_batch(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ int i;
+
+ memset(&dsa->batch_completion, 0, sizeof(dsa->batch_completion));
+
+ for (i = 0; i < ARRAY_SIZE(dsa->copy); i++) {
+ struct dsa_hw_desc *copy_desc = &dsa->copy[i];
+
+ dsa_copy_desc_init(device, copy_desc, src, dst, size, false);
+
+ /* Don't request completions for individual copies. */
+ copy_desc->flags &= ~IDXD_OP_FLAG_RCR;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(dsa->batch) && count; i++) {
+ struct dsa_hw_desc *batch_desc = &dsa->batch[i];
+ int nr_copies;
+
+ nr_copies = min(count, dsa->max_copies_per_batch);
+ count -= nr_copies;
+
+ /*
+ * Batches must have at least 2 copies, so handle the case where
+ * there is exactly 1 copy left by doing one less copy in this
+ * batch and then 2 in the next.
+ */
+ if (count == 1) {
+ nr_copies--;
+ count++;
+ }
+
+ dsa_batch_desc_init(device, batch_desc, nr_copies);
+
+ /* Request a completion for the last batch. */
+ if (!count)
+ batch_desc->flags |= IDXD_OP_FLAG_RCR;
+
+ dsa_desc_write(device, batch_desc);
+ }
+
+ VFIO_ASSERT_EQ(count, 0, "Failed to start %lu copies.\n", count);
+}
+
+static void dsa_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ /* DSA devices require at least 2 copies per batch. */
+ if (count == 1)
+ dsa_memcpy_one(device, src, dst, size, false);
+ else
+ dsa_memcpy_batch(device, src, dst, size, count);
+
+ dsa->memcpy_count = count;
+}
+
+static int dsa_memcpy_wait(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ int r;
+
+ if (dsa->memcpy_count == 1)
+ r = dsa_completion_wait(device, &dsa->copy_completion);
+ else
+ r = dsa_completion_wait(device, &dsa->batch_completion);
+
+ dsa->memcpy_count = 0;
+
+ return r;
+}
+
+static void dsa_send_msi(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ dsa_memcpy_one(device,
+ to_iova(device, &dsa->send_msi_src),
+ to_iova(device, &dsa->send_msi_dst),
+ sizeof(dsa->send_msi_src), true);
+
+ VFIO_ASSERT_EQ(dsa_completion_wait(device, &dsa->copy_completion), 0);
+}
+
+const struct vfio_pci_driver_ops dsa_ops = {
+ .name = "dsa",
+ .probe = dsa_probe,
+ .init = dsa_init,
+ .remove = dsa_remove,
+ .memcpy_start = dsa_memcpy_start,
+ .memcpy_wait = dsa_memcpy_wait,
+ .send_msi = dsa_send_msi,
+};
diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h
new file mode 120000
index 000000000000..bde657c3c2af
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/idxd/registers.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h
new file mode 120000
index 000000000000..8ab52ddd4458
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/ioat/hw.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
new file mode 100644
index 000000000000..a871b935542b
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <unistd.h>
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/pci_ids.h>
+#include <linux/sizes.h>
+
+#include <libvfio.h>
+
+#include "hw.h"
+#include "registers.h"
+
+#define IOAT_DMACOUNT_MAX UINT16_MAX
+
+struct ioat_state {
+ /* Single descriptor used to issue DMA memcpy operations */
+ struct ioat_dma_descriptor desc;
+
+ /* Copy buffers used by ioat_send_msi() to generate an interrupt. */
+ u64 send_msi_src;
+ u64 send_msi_dst;
+};
+
+static inline struct ioat_state *to_ioat_state(struct vfio_pci_device *device)
+{
+ return device->driver.region.vaddr;
+}
+
+static inline void *ioat_channel_registers(struct vfio_pci_device *device)
+{
+ return device->bars[0].vaddr + IOAT_CHANNEL_MMIO_SIZE;
+}
+
+static int ioat_probe(struct vfio_pci_device *device)
+{
+ u8 version;
+ int r;
+
+ if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_IOAT_SKX))
+ return -EINVAL;
+
+ VFIO_ASSERT_NOT_NULL(device->bars[0].vaddr);
+
+ version = readb(device->bars[0].vaddr + IOAT_VER_OFFSET);
+ switch (version) {
+ case IOAT_VER_3_2:
+ case IOAT_VER_3_3:
+ r = 0;
+ break;
+ default:
+ dev_err(device, "ioat: Unsupported version: 0x%x\n", version);
+ r = -EINVAL;
+ }
+ return r;
+}
+
+static u64 ioat_channel_status(void *bar)
+{
+ return readq(bar + IOAT_CHANSTS_OFFSET) & IOAT_CHANSTS_STATUS;
+}
+
+static void ioat_clear_errors(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u32 errors;
+
+ errors = vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET);
+ vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors);
+
+ errors = vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+ vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors);
+
+ errors = readl(registers + IOAT_CHANERR_OFFSET);
+ writel(errors, registers + IOAT_CHANERR_OFFSET);
+}
+
+static void ioat_reset(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u32 sleep_ms = 1, attempts = 5000 / sleep_ms;
+ u8 chancmd;
+
+ ioat_clear_errors(device);
+
+ writeb(IOAT_CHANCMD_RESET, registers + IOAT2_CHANCMD_OFFSET);
+
+ for (;;) {
+ chancmd = readb(registers + IOAT2_CHANCMD_OFFSET);
+ if (!(chancmd & IOAT_CHANCMD_RESET))
+ break;
+
+ VFIO_ASSERT_GT(--attempts, 0);
+ usleep(sleep_ms * 1000);
+ }
+
+ VFIO_ASSERT_EQ(ioat_channel_status(registers), IOAT_CHANSTS_HALTED);
+}
+
+static void ioat_init(struct vfio_pci_device *device)
+{
+ struct ioat_state *ioat = to_ioat_state(device);
+ u8 intrctrl;
+
+ VFIO_ASSERT_GE(device->driver.region.size, sizeof(*ioat));
+
+ vfio_pci_config_writew(device, PCI_COMMAND,
+ PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+
+ ioat_reset(device);
+
+ /* Enable the use of MXI-x interrupts for channel interrupts. */
+ intrctrl = IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ writeb(intrctrl, device->bars[0].vaddr + IOAT_INTRCTRL_OFFSET);
+
+ vfio_pci_msix_enable(device, 0, device->msix_info.count);
+
+ device->driver.msi = 0;
+ device->driver.max_memcpy_size =
+ 1UL << readb(device->bars[0].vaddr + IOAT_XFERCAP_OFFSET);
+ device->driver.max_memcpy_count = IOAT_DMACOUNT_MAX;
+}
+
+static void ioat_remove(struct vfio_pci_device *device)
+{
+ ioat_reset(device);
+ vfio_pci_msix_disable(device);
+}
+
+static void ioat_handle_error(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+
+ dev_err(device, "Error detected during memcpy operation!\n"
+ " CHANERR: 0x%x\n"
+ " CHANERR_INT: 0x%x\n"
+ " DMAUNCERRSTS: 0x%x\n",
+ readl(registers + IOAT_CHANERR_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET));
+
+ ioat_reset(device);
+}
+
+static int ioat_memcpy_wait(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u64 status;
+ int r = 0;
+
+ /* Wait until all operations complete. */
+ for (;;) {
+ status = ioat_channel_status(registers);
+ if (status == IOAT_CHANSTS_DONE)
+ break;
+
+ if (status == IOAT_CHANSTS_HALTED) {
+ ioat_handle_error(device);
+ return -1;
+ }
+ }
+
+ /* Put the channel into the SUSPENDED state. */
+ writeb(IOAT_CHANCMD_SUSPEND, registers + IOAT2_CHANCMD_OFFSET);
+ for (;;) {
+ status = ioat_channel_status(registers);
+ if (status == IOAT_CHANSTS_SUSPENDED)
+ break;
+ }
+
+ return r;
+}
+
+static void __ioat_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u16 count, bool interrupt)
+{
+ void *registers = ioat_channel_registers(device);
+ struct ioat_state *ioat = to_ioat_state(device);
+ u64 desc_iova;
+ u16 chanctrl;
+
+ desc_iova = to_iova(device, &ioat->desc);
+ ioat->desc = (struct ioat_dma_descriptor) {
+ .ctl_f.op = IOAT_OP_COPY,
+ .ctl_f.int_en = interrupt,
+ .src_addr = src,
+ .dst_addr = dst,
+ .size = size,
+ .next = desc_iova,
+ };
+
+ /* Tell the device the address of the descriptor. */
+ writeq(desc_iova, registers + IOAT2_CHAINADDR_OFFSET);
+
+ /* (Re)Enable the channel interrupt and abort on any errors */
+ chanctrl = IOAT_CHANCTRL_INT_REARM | IOAT_CHANCTRL_ANY_ERR_ABORT_EN;
+ writew(chanctrl, registers + IOAT_CHANCTRL_OFFSET);
+
+ /* Kick off @count DMA copy operation(s). */
+ writew(count, registers + IOAT_CHAN_DMACOUNT_OFFSET);
+}
+
+static void ioat_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count)
+{
+ __ioat_memcpy_start(device, src, dst, size, count, false);
+}
+
+static void ioat_send_msi(struct vfio_pci_device *device)
+{
+ struct ioat_state *ioat = to_ioat_state(device);
+
+ __ioat_memcpy_start(device,
+ to_iova(device, &ioat->send_msi_src),
+ to_iova(device, &ioat->send_msi_dst),
+ sizeof(ioat->send_msi_src), 1, true);
+
+ VFIO_ASSERT_EQ(ioat_memcpy_wait(device), 0);
+}
+
+const struct vfio_pci_driver_ops ioat_ops = {
+ .name = "ioat",
+ .probe = ioat_probe,
+ .init = ioat_init,
+ .remove = ioat_remove,
+ .memcpy_start = ioat_memcpy_start,
+ .memcpy_wait = ioat_memcpy_wait,
+ .send_msi = ioat_send_msi,
+};
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h
new file mode 120000
index 000000000000..0b809cfd8fe6
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/ioat/registers.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio.h b/tools/testing/selftests/vfio/lib/include/libvfio.h
new file mode 100644
index 000000000000..279ddcd70194
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H
+
+#include <libvfio/assert.h>
+#include <libvfio/iommu.h>
+#include <libvfio/iova_allocator.h>
+#include <libvfio/vfio_pci_device.h>
+#include <libvfio/vfio_pci_driver.h>
+
+/*
+ * Return the BDF string of the device that the test should use.
+ *
+ * If a BDF string is provided by the user on the command line (as the last
+ * element of argv[]), then this function will return that and decrement argc
+ * by 1.
+ *
+ * Otherwise this function will attempt to use the environment variable
+ * $VFIO_SELFTESTS_BDF.
+ *
+ * If BDF cannot be determined then the test will exit with KSFT_SKIP.
+ */
+const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
+char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/assert.h b/tools/testing/selftests/vfio/lib/include/libvfio/assert.h
new file mode 100644
index 000000000000..f4ebd122d9b6
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/assert.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "../../../../kselftest.h"
+
+#define VFIO_LOG_AND_EXIT(...) do { \
+ fprintf(stderr, " " __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ exit(KSFT_FAIL); \
+} while (0)
+
+#define VFIO_ASSERT_OP(_lhs, _rhs, _op, ...) do { \
+ typeof(_lhs) __lhs = (_lhs); \
+ typeof(_rhs) __rhs = (_rhs); \
+ \
+ if (__lhs _op __rhs) \
+ break; \
+ \
+ fprintf(stderr, "%s:%u: Assertion Failure\n\n", __FILE__, __LINE__); \
+ fprintf(stderr, " Expression: " #_lhs " " #_op " " #_rhs "\n"); \
+ fprintf(stderr, " Observed: %#lx %s %#lx\n", \
+ (u64)__lhs, #_op, (u64)__rhs); \
+ fprintf(stderr, " [errno: %d - %s]\n", errno, strerror(errno)); \
+ VFIO_LOG_AND_EXIT(__VA_ARGS__); \
+} while (0)
+
+#define VFIO_ASSERT_EQ(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, ==, ##__VA_ARGS__)
+#define VFIO_ASSERT_NE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, !=, ##__VA_ARGS__)
+#define VFIO_ASSERT_LT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <, ##__VA_ARGS__)
+#define VFIO_ASSERT_LE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <=, ##__VA_ARGS__)
+#define VFIO_ASSERT_GT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >, ##__VA_ARGS__)
+#define VFIO_ASSERT_GE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >=, ##__VA_ARGS__)
+#define VFIO_ASSERT_TRUE(_a, ...) VFIO_ASSERT_NE(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_FALSE(_a, ...) VFIO_ASSERT_EQ(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_NULL(_a, ...) VFIO_ASSERT_EQ(NULL, _a, ##__VA_ARGS__)
+#define VFIO_ASSERT_NOT_NULL(_a, ...) VFIO_ASSERT_NE(NULL, _a, ##__VA_ARGS__)
+
+#define VFIO_FAIL(_fmt, ...) do { \
+ fprintf(stderr, "%s:%u: FAIL\n\n", __FILE__, __LINE__); \
+ VFIO_LOG_AND_EXIT(_fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ioctl_assert(_fd, _op, _arg) do { \
+ void *__arg = (_arg); \
+ int __ret = ioctl((_fd), (_op), (__arg)); \
+ VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
+} while (0)
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
new file mode 100644
index 000000000000..5c9b9dc6d993
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOMMU_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOMMU_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+#include <libvfio/assert.h>
+
+typedef u64 iova_t;
+
+struct iommu_mode {
+ const char *name;
+ const char *container_path;
+ unsigned long iommu_type;
+};
+
+extern const char *default_iommu_mode;
+
+struct dma_region {
+ struct list_head link;
+ void *vaddr;
+ iova_t iova;
+ u64 size;
+};
+
+struct iommu {
+ const struct iommu_mode *mode;
+ int container_fd;
+ int iommufd;
+ u32 ioas_id;
+ struct list_head dma_regions;
+};
+
+struct iommu *iommu_init(const char *iommu_mode);
+void iommu_cleanup(struct iommu *iommu);
+
+int __iommu_map(struct iommu *iommu, struct dma_region *region);
+
+static inline void iommu_map(struct iommu *iommu, struct dma_region *region)
+{
+ VFIO_ASSERT_EQ(__iommu_map(iommu, region), 0);
+}
+
+int __iommu_unmap(struct iommu *iommu, struct dma_region *region, u64 *unmapped);
+
+static inline void iommu_unmap(struct iommu *iommu, struct dma_region *region)
+{
+ VFIO_ASSERT_EQ(__iommu_unmap(iommu, region, NULL), 0);
+}
+
+int __iommu_unmap_all(struct iommu *iommu, u64 *unmapped);
+
+static inline void iommu_unmap_all(struct iommu *iommu)
+{
+ VFIO_ASSERT_EQ(__iommu_unmap_all(iommu, NULL), 0);
+}
+
+int __iommu_hva2iova(struct iommu *iommu, void *vaddr, iova_t *iova);
+iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr);
+
+struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges);
+
+/*
+ * Generator for VFIO selftests fixture variants that replicate across all
+ * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()
+ * which should then use FIXTURE_VARIANT_ADD() to create the variant.
+ */
+#define FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(...) \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1v2_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd, ##__VA_ARGS__)
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOMMU_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h b/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
new file mode 100644
index 000000000000..8f1d994e9ea2
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H
+
+#include <uapi/linux/types.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/iommufd.h>
+
+#include <libvfio/iommu.h>
+
+struct iova_allocator {
+ struct iommu_iova_range *ranges;
+ u32 nranges;
+ u32 range_idx;
+ u64 range_offset;
+};
+
+struct iova_allocator *iova_allocator_init(struct iommu *iommu);
+void iova_allocator_cleanup(struct iova_allocator *allocator);
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
new file mode 100644
index 000000000000..2858885a89bb
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H
+
+#include <fcntl.h>
+#include <linux/vfio.h>
+#include <linux/pci_regs.h>
+
+#include <libvfio/assert.h>
+#include <libvfio/iommu.h>
+#include <libvfio/vfio_pci_driver.h>
+
+struct vfio_pci_bar {
+ struct vfio_region_info info;
+ void *vaddr;
+};
+
+struct vfio_pci_device {
+ const char *bdf;
+ int fd;
+ int group_fd;
+
+ struct iommu *iommu;
+
+ struct vfio_device_info info;
+ struct vfio_region_info config_space;
+ struct vfio_pci_bar bars[PCI_STD_NUM_BARS];
+
+ struct vfio_irq_info msi_info;
+ struct vfio_irq_info msix_info;
+
+ /* eventfds for MSI and MSI-x interrupts */
+ int msi_eventfds[PCI_MSIX_FLAGS_QSIZE + 1];
+
+ struct vfio_pci_driver driver;
+};
+
+#define dev_info(_dev, _fmt, ...) printf("%s: " _fmt, (_dev)->bdf, ##__VA_ARGS__)
+#define dev_err(_dev, _fmt, ...) fprintf(stderr, "%s: " _fmt, (_dev)->bdf, ##__VA_ARGS__)
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu);
+void vfio_pci_device_cleanup(struct vfio_pci_device *device);
+
+void vfio_pci_device_reset(struct vfio_pci_device *device);
+
+void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
+ size_t config, size_t size, void *data);
+
+#define vfio_pci_config_read(_device, _offset, _type) ({ \
+ _type __data; \
+ vfio_pci_config_access((_device), false, _offset, sizeof(__data), &__data); \
+ __data; \
+})
+
+#define vfio_pci_config_readb(_d, _o) vfio_pci_config_read(_d, _o, u8)
+#define vfio_pci_config_readw(_d, _o) vfio_pci_config_read(_d, _o, u16)
+#define vfio_pci_config_readl(_d, _o) vfio_pci_config_read(_d, _o, u32)
+
+#define vfio_pci_config_write(_device, _offset, _value, _type) do { \
+ _type __data = (_value); \
+ vfio_pci_config_access((_device), true, _offset, sizeof(_type), &__data); \
+} while (0)
+
+#define vfio_pci_config_writeb(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u8)
+#define vfio_pci_config_writew(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u16)
+#define vfio_pci_config_writel(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u32)
+
+void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index,
+ u32 vector, int count);
+void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index);
+void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector);
+
+static inline void fcntl_set_nonblock(int fd)
+{
+ int r;
+
+ r = fcntl(fd, F_GETFL, 0);
+ VFIO_ASSERT_NE(r, -1, "F_GETFL failed for fd %d\n", fd);
+
+ r = fcntl(fd, F_SETFL, r | O_NONBLOCK);
+ VFIO_ASSERT_NE(r, -1, "F_SETFL O_NONBLOCK failed for fd %d\n", fd);
+}
+
+static inline void vfio_pci_msi_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSI_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msi_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSI_IRQ_INDEX);
+}
+
+static inline void vfio_pci_msix_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSIX_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msix_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSIX_IRQ_INDEX);
+}
+
+static inline int __to_iova(struct vfio_pci_device *device, void *vaddr, iova_t *iova)
+{
+ return __iommu_hva2iova(device->iommu, vaddr, iova);
+}
+
+static inline iova_t to_iova(struct vfio_pci_device *device, void *vaddr)
+{
+ return iommu_hva2iova(device->iommu, vaddr);
+}
+
+static inline bool vfio_pci_device_match(struct vfio_pci_device *device,
+ u16 vendor_id, u16 device_id)
+{
+ return (vendor_id == vfio_pci_config_readw(device, PCI_VENDOR_ID)) &&
+ (device_id == vfio_pci_config_readw(device, PCI_DEVICE_ID));
+}
+
+const char *vfio_pci_get_cdev_path(const char *bdf);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h
new file mode 100644
index 000000000000..e5ada209b1d1
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DRIVER_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DRIVER_H
+
+#include <libvfio/iommu.h>
+
+struct vfio_pci_device;
+
+struct vfio_pci_driver_ops {
+ const char *name;
+
+ /**
+ * @probe() - Check if the driver supports the given device.
+ *
+ * Return: 0 on success, non-0 on failure.
+ */
+ int (*probe)(struct vfio_pci_device *device);
+
+ /**
+ * @init() - Initialize the driver for @device.
+ *
+ * Must be called after device->driver.region has been initialized.
+ */
+ void (*init)(struct vfio_pci_device *device);
+
+ /**
+ * remove() - Deinitialize the driver for @device.
+ */
+ void (*remove)(struct vfio_pci_device *device);
+
+ /**
+ * memcpy_start() - Kick off @count repeated memcpy operations from
+ * [@src, @src + @size) to [@dst, @dst + @size).
+ *
+ * Guarantees:
+ * - The device will attempt DMA reads on [src, src + size).
+ * - The device will attempt DMA writes on [dst, dst + size).
+ * - The device will not generate any interrupts.
+ *
+ * memcpy_start() returns immediately, it does not wait for the
+ * copies to complete.
+ */
+ void (*memcpy_start)(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count);
+
+ /**
+ * memcpy_wait() - Wait until the memcpy operations started by
+ * memcpy_start() have finished.
+ *
+ * Guarantees:
+ * - All in-flight DMAs initiated by memcpy_start() are fully complete
+ * before memcpy_wait() returns.
+ *
+ * Returns non-0 if the driver detects that an error occurred during the
+ * memcpy, 0 otherwise.
+ */
+ int (*memcpy_wait)(struct vfio_pci_device *device);
+
+ /**
+ * send_msi() - Make the device send the MSI device->driver.msi.
+ *
+ * Guarantees:
+ * - The device will send the MSI once.
+ */
+ void (*send_msi)(struct vfio_pci_device *device);
+};
+
+struct vfio_pci_driver {
+ const struct vfio_pci_driver_ops *ops;
+ bool initialized;
+ bool memcpy_in_progress;
+
+ /* Region to be used by the driver (e.g. for in-memory descriptors) */
+ struct dma_region region;
+
+ /* The maximum size that can be passed to memcpy_start(). */
+ u64 max_memcpy_size;
+
+ /* The maximum count that can be passed to memcpy_start(). */
+ u64 max_memcpy_count;
+
+ /* The MSI vector the device will signal in ops->send_msi(). */
+ int msi;
+};
+
+void vfio_pci_driver_probe(struct vfio_pci_device *device);
+void vfio_pci_driver_init(struct vfio_pci_device *device);
+void vfio_pci_driver_remove(struct vfio_pci_device *device);
+int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size);
+void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count);
+int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device);
+void vfio_pci_driver_send_msi(struct vfio_pci_device *device);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DRIVER_H */
diff --git a/tools/testing/selftests/vfio/lib/iommu.c b/tools/testing/selftests/vfio/lib/iommu.c
new file mode 100644
index 000000000000..8079d43523f3
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/iommu.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <uapi/linux/types.h>
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "../../../kselftest.h"
+#include <libvfio.h>
+
+const char *default_iommu_mode = "iommufd";
+
+/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
+static const struct iommu_mode iommu_modes[] = {
+ {
+ .name = "vfio_type1_iommu",
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = "vfio_type1v2_iommu",
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = "iommufd_compat_type1",
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = "iommufd_compat_type1v2",
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = "iommufd",
+ },
+};
+
+static const struct iommu_mode *lookup_iommu_mode(const char *iommu_mode)
+{
+ int i;
+
+ if (!iommu_mode)
+ iommu_mode = default_iommu_mode;
+
+ for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) {
+ if (strcmp(iommu_mode, iommu_modes[i].name))
+ continue;
+
+ return &iommu_modes[i];
+ }
+
+ VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode);
+}
+
+int __iommu_hva2iova(struct iommu *iommu, void *vaddr, iova_t *iova)
+{
+ struct dma_region *region;
+
+ list_for_each_entry(region, &iommu->dma_regions, link) {
+ if (vaddr < region->vaddr)
+ continue;
+
+ if (vaddr >= region->vaddr + region->size)
+ continue;
+
+ if (iova)
+ *iova = region->iova + (vaddr - region->vaddr);
+
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr)
+{
+ iova_t iova;
+ int ret;
+
+ ret = __iommu_hva2iova(iommu, vaddr, &iova);
+ VFIO_ASSERT_EQ(ret, 0, "%p is not mapped into the iommu\n", vaddr);
+
+ return iova;
+}
+
+static int vfio_iommu_map(struct iommu *iommu, struct dma_region *region)
+{
+ struct vfio_iommu_type1_dma_map args = {
+ .argsz = sizeof(args),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .vaddr = (u64)region->vaddr,
+ .iova = region->iova,
+ .size = region->size,
+ };
+
+ if (ioctl(iommu->container_fd, VFIO_IOMMU_MAP_DMA, &args))
+ return -errno;
+
+ return 0;
+}
+
+static int iommufd_map(struct iommu *iommu, struct dma_region *region)
+{
+ struct iommu_ioas_map args = {
+ .size = sizeof(args),
+ .flags = IOMMU_IOAS_MAP_READABLE |
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_FIXED_IOVA,
+ .user_va = (u64)region->vaddr,
+ .iova = region->iova,
+ .length = region->size,
+ .ioas_id = iommu->ioas_id,
+ };
+
+ if (ioctl(iommu->iommufd, IOMMU_IOAS_MAP, &args))
+ return -errno;
+
+ return 0;
+}
+
+int __iommu_map(struct iommu *iommu, struct dma_region *region)
+{
+ int ret;
+
+ if (iommu->iommufd)
+ ret = iommufd_map(iommu, region);
+ else
+ ret = vfio_iommu_map(iommu, region);
+
+ if (ret)
+ return ret;
+
+ list_add(&region->link, &iommu->dma_regions);
+
+ return 0;
+}
+
+static int __vfio_iommu_unmap(int fd, u64 iova, u64 size, u32 flags, u64 *unmapped)
+{
+ struct vfio_iommu_type1_dma_unmap args = {
+ .argsz = sizeof(args),
+ .iova = iova,
+ .size = size,
+ .flags = flags,
+ };
+
+ if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args))
+ return -errno;
+
+ if (unmapped)
+ *unmapped = args.size;
+
+ return 0;
+}
+
+static int vfio_iommu_unmap(struct iommu *iommu, struct dma_region *region,
+ u64 *unmapped)
+{
+ return __vfio_iommu_unmap(iommu->container_fd, region->iova,
+ region->size, 0, unmapped);
+}
+
+static int __iommufd_unmap(int fd, u64 iova, u64 length, u32 ioas_id, u64 *unmapped)
+{
+ struct iommu_ioas_unmap args = {
+ .size = sizeof(args),
+ .iova = iova,
+ .length = length,
+ .ioas_id = ioas_id,
+ };
+
+ if (ioctl(fd, IOMMU_IOAS_UNMAP, &args))
+ return -errno;
+
+ if (unmapped)
+ *unmapped = args.length;
+
+ return 0;
+}
+
+static int iommufd_unmap(struct iommu *iommu, struct dma_region *region,
+ u64 *unmapped)
+{
+ return __iommufd_unmap(iommu->iommufd, region->iova, region->size,
+ iommu->ioas_id, unmapped);
+}
+
+int __iommu_unmap(struct iommu *iommu, struct dma_region *region, u64 *unmapped)
+{
+ int ret;
+
+ if (iommu->iommufd)
+ ret = iommufd_unmap(iommu, region, unmapped);
+ else
+ ret = vfio_iommu_unmap(iommu, region, unmapped);
+
+ if (ret)
+ return ret;
+
+ list_del_init(&region->link);
+
+ return 0;
+}
+
+int __iommu_unmap_all(struct iommu *iommu, u64 *unmapped)
+{
+ int ret;
+ struct dma_region *curr, *next;
+
+ if (iommu->iommufd)
+ ret = __iommufd_unmap(iommu->iommufd, 0, UINT64_MAX,
+ iommu->ioas_id, unmapped);
+ else
+ ret = __vfio_iommu_unmap(iommu->container_fd, 0, 0,
+ VFIO_DMA_UNMAP_FLAG_ALL, unmapped);
+
+ if (ret)
+ return ret;
+
+ list_for_each_entry_safe(curr, next, &iommu->dma_regions, link)
+ list_del_init(&curr->link);
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz,
+ u32 *cap_offset)
+{
+ struct vfio_info_cap_header *hdr;
+
+ if (!*cap_offset)
+ return NULL;
+
+ VFIO_ASSERT_LT(*cap_offset, bufsz);
+ VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
+
+ hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
+ *cap_offset = hdr->next;
+
+ return hdr;
+}
+
+static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
+ u16 cap_id)
+{
+ struct vfio_info_cap_header *hdr;
+ u32 cap_offset = info->cap_offset;
+ u32 max_depth;
+ u32 depth = 0;
+
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
+ return NULL;
+
+ if (cap_offset)
+ VFIO_ASSERT_GE(cap_offset, sizeof(*info));
+
+ max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr);
+
+ while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
+ depth++;
+ VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n");
+
+ if (hdr->id == cap_id)
+ return hdr;
+ }
+
+ return NULL;
+}
+
+/* Return buffer including capability chain, if present. Free with free() */
+static struct vfio_iommu_type1_info *vfio_iommu_get_info(int container_fd)
+{
+ struct vfio_iommu_type1_info *info;
+
+ info = malloc(sizeof(*info));
+ VFIO_ASSERT_NOT_NULL(info);
+
+ *info = (struct vfio_iommu_type1_info) {
+ .argsz = sizeof(*info),
+ };
+
+ ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
+ VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+ info = realloc(info, info->argsz);
+ VFIO_ASSERT_NOT_NULL(info);
+
+ ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
+ VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+ return info;
+}
+
+/*
+ * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
+ * report iommufd's iommu_iova_range. Free with free().
+ */
+static struct iommu_iova_range *vfio_iommu_iova_ranges(struct iommu *iommu,
+ u32 *nranges)
+{
+ struct vfio_iommu_type1_info_cap_iova_range *cap_range;
+ struct vfio_iommu_type1_info *info;
+ struct vfio_info_cap_header *hdr;
+ struct iommu_iova_range *ranges = NULL;
+
+ info = vfio_iommu_get_info(iommu->container_fd);
+ hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+ VFIO_ASSERT_NOT_NULL(hdr);
+
+ cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
+ VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
+
+ ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ for (u32 i = 0; i < cap_range->nr_iovas; i++) {
+ ranges[i] = (struct iommu_iova_range){
+ .start = cap_range->iova_ranges[i].start,
+ .last = cap_range->iova_ranges[i].end,
+ };
+ }
+
+ *nranges = cap_range->nr_iovas;
+
+ free(info);
+ return ranges;
+}
+
+/* Return iova ranges of the device's IOAS. Free with free() */
+static struct iommu_iova_range *iommufd_iova_ranges(struct iommu *iommu,
+ u32 *nranges)
+{
+ struct iommu_iova_range *ranges;
+ int ret;
+
+ struct iommu_ioas_iova_ranges query = {
+ .size = sizeof(query),
+ .ioas_id = iommu->ioas_id,
+ };
+
+ ret = ioctl(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ VFIO_ASSERT_EQ(ret, -1);
+ VFIO_ASSERT_EQ(errno, EMSGSIZE);
+ VFIO_ASSERT_GT(query.num_iovas, 0);
+
+ ranges = calloc(query.num_iovas, sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ query.allowed_iovas = (uintptr_t)ranges;
+
+ ioctl_assert(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ *nranges = query.num_iovas;
+
+ return ranges;
+}
+
+static int iova_range_comp(const void *a, const void *b)
+{
+ const struct iommu_iova_range *ra = a, *rb = b;
+
+ if (ra->start < rb->start)
+ return -1;
+
+ if (ra->start > rb->start)
+ return 1;
+
+ return 0;
+}
+
+/* Return sorted IOVA ranges of the device. Free with free(). */
+struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges)
+{
+ struct iommu_iova_range *ranges;
+
+ if (iommu->iommufd)
+ ranges = iommufd_iova_ranges(iommu, nranges);
+ else
+ ranges = vfio_iommu_iova_ranges(iommu, nranges);
+
+ if (!ranges)
+ return NULL;
+
+ VFIO_ASSERT_GT(*nranges, 0);
+
+ /* Sort and check that ranges are sane and non-overlapping */
+ qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
+ VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
+
+ for (u32 i = 1; i < *nranges; i++) {
+ VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
+ VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
+ }
+
+ return ranges;
+}
+
+static u32 iommufd_ioas_alloc(int iommufd)
+{
+ struct iommu_ioas_alloc args = {
+ .size = sizeof(args),
+ };
+
+ ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args);
+ return args.out_ioas_id;
+}
+
+struct iommu *iommu_init(const char *iommu_mode)
+{
+ const char *container_path;
+ struct iommu *iommu;
+ int version;
+
+ iommu = calloc(1, sizeof(*iommu));
+ VFIO_ASSERT_NOT_NULL(iommu);
+
+ INIT_LIST_HEAD(&iommu->dma_regions);
+
+ iommu->mode = lookup_iommu_mode(iommu_mode);
+
+ container_path = iommu->mode->container_path;
+ if (container_path) {
+ iommu->container_fd = open(container_path, O_RDWR);
+ VFIO_ASSERT_GE(iommu->container_fd, 0, "open(%s) failed\n", container_path);
+
+ version = ioctl(iommu->container_fd, VFIO_GET_API_VERSION);
+ VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version);
+ } else {
+ /*
+ * Require device->iommufd to be >0 so that a simple non-0 check can be
+ * used to check if iommufd is enabled. In practice open() will never
+ * return 0 unless stdin is closed.
+ */
+ iommu->iommufd = open("/dev/iommu", O_RDWR);
+ VFIO_ASSERT_GT(iommu->iommufd, 0);
+
+ iommu->ioas_id = iommufd_ioas_alloc(iommu->iommufd);
+ }
+
+ return iommu;
+}
+
+void iommu_cleanup(struct iommu *iommu)
+{
+ if (iommu->iommufd)
+ VFIO_ASSERT_EQ(close(iommu->iommufd), 0);
+ else
+ VFIO_ASSERT_EQ(close(iommu->container_fd), 0);
+
+ free(iommu);
+}
diff --git a/tools/testing/selftests/vfio/lib/iova_allocator.c b/tools/testing/selftests/vfio/lib/iova_allocator.c
new file mode 100644
index 000000000000..a12b0a51e9e6
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/iova_allocator.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/overflow.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+struct iova_allocator *iova_allocator_init(struct iommu *iommu)
+{
+ struct iova_allocator *allocator;
+ struct iommu_iova_range *ranges;
+ u32 nranges;
+
+ ranges = iommu_iova_ranges(iommu, &nranges);
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ allocator = malloc(sizeof(*allocator));
+ VFIO_ASSERT_NOT_NULL(allocator);
+
+ *allocator = (struct iova_allocator){
+ .ranges = ranges,
+ .nranges = nranges,
+ .range_idx = 0,
+ .range_offset = 0,
+ };
+
+ return allocator;
+}
+
+void iova_allocator_cleanup(struct iova_allocator *allocator)
+{
+ free(allocator->ranges);
+ free(allocator);
+}
+
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size)
+{
+ VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n");
+ VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n");
+
+ for (;;) {
+ struct iommu_iova_range *range;
+ iova_t iova, last;
+
+ VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges,
+ "IOVA allocator out of space\n");
+
+ range = &allocator->ranges[allocator->range_idx];
+ iova = range->start + allocator->range_offset;
+
+ /* Check for sufficient space at the current offset */
+ if (check_add_overflow(iova, size - 1, &last) ||
+ last > range->last)
+ goto next_range;
+
+ /* Align iova to size */
+ iova = last & ~(size - 1);
+
+ /* Check for sufficient space at the aligned iova */
+ if (check_add_overflow(iova, size - 1, &last) ||
+ last > range->last)
+ goto next_range;
+
+ if (last == range->last) {
+ allocator->range_idx++;
+ allocator->range_offset = 0;
+ } else {
+ allocator->range_offset = last - range->start + 1;
+ }
+
+ return iova;
+
+next_range:
+ allocator->range_idx++;
+ allocator->range_offset = 0;
+ }
+}
+
diff --git a/tools/testing/selftests/vfio/lib/libvfio.c b/tools/testing/selftests/vfio/lib/libvfio.c
new file mode 100644
index 000000000000..a23a3cc5be69
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/libvfio.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../../../kselftest.h"
+#include <libvfio.h>
+
+static bool is_bdf(const char *str)
+{
+ unsigned int s, b, d, f;
+ int length, count;
+
+ count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length);
+ return count == 4 && length == strlen(str);
+}
+
+static char **get_bdfs_cmdline(int *argc, char *argv[], int *nr_bdfs)
+{
+ int i;
+
+ for (i = *argc - 1; i > 0 && is_bdf(argv[i]); i--)
+ continue;
+
+ i++;
+ *nr_bdfs = *argc - i;
+ *argc -= *nr_bdfs;
+
+ return *nr_bdfs ? &argv[i] : NULL;
+}
+
+static char *get_bdf_env(void)
+{
+ char *bdf;
+
+ bdf = getenv("VFIO_SELFTESTS_BDF");
+ if (!bdf)
+ return NULL;
+
+ VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf);
+ return bdf;
+}
+
+char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs)
+{
+ static char *env_bdf;
+ char **bdfs;
+
+ bdfs = get_bdfs_cmdline(argc, argv, nr_bdfs);
+ if (bdfs)
+ return bdfs;
+
+ env_bdf = get_bdf_env();
+ if (env_bdf) {
+ *nr_bdfs = 1;
+ return &env_bdf;
+ }
+
+ fprintf(stderr, "Unable to determine which device(s) to use, skipping test.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address via environment variable:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " export VFIO_SELFTESTS_BDF=\"segment:bus:device.function\"\n");
+ fprintf(stderr, " %s [options]\n", argv[0]);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address(es) via argv:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " %s [options] segment:bus:device.function ...\n", argv[0]);
+ fprintf(stderr, "\n");
+ exit(KSFT_SKIP);
+}
+
+const char *vfio_selftests_get_bdf(int *argc, char *argv[])
+{
+ int nr_bdfs;
+
+ return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0];
+}
diff --git a/tools/testing/selftests/vfio/lib/libvfio.mk b/tools/testing/selftests/vfio/lib/libvfio.mk
new file mode 100644
index 000000000000..9f47bceed16f
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/libvfio.mk
@@ -0,0 +1,29 @@
+include $(top_srcdir)/scripts/subarch.include
+ARCH ?= $(SUBARCH)
+
+LIBVFIO_SRCDIR := $(selfdir)/vfio/lib
+
+LIBVFIO_C := iommu.c
+LIBVFIO_C += iova_allocator.c
+LIBVFIO_C += libvfio.c
+LIBVFIO_C += vfio_pci_device.c
+LIBVFIO_C += vfio_pci_driver.c
+
+ifeq ($(ARCH:x86_64=x86),x86)
+LIBVFIO_C += drivers/ioat/ioat.c
+LIBVFIO_C += drivers/dsa/dsa.c
+endif
+
+LIBVFIO_OUTPUT := $(OUTPUT)/libvfio
+
+LIBVFIO_O := $(patsubst %.c, $(LIBVFIO_OUTPUT)/%.o, $(LIBVFIO_C))
+
+LIBVFIO_O_DIRS := $(shell dirname $(LIBVFIO_O) | uniq)
+$(shell mkdir -p $(LIBVFIO_O_DIRS))
+
+CFLAGS += -I$(LIBVFIO_SRCDIR)/include
+
+$(LIBVFIO_O): $(LIBVFIO_OUTPUT)/%.o : $(LIBVFIO_SRCDIR)/%.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBVFIO_OUTPUT)
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
new file mode 100644
index 000000000000..8e34b9bfc96b
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/overflow.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+
+#include "kselftest.h"
+#include <libvfio.h>
+
+#define PCI_SYSFS_PATH "/sys/bus/pci/devices"
+
+static void vfio_pci_irq_set(struct vfio_pci_device *device,
+ u32 index, u32 vector, u32 count, int *fds)
+{
+ u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {};
+ struct vfio_irq_set *irq = (void *)&buf;
+ int *irq_fds = (void *)&irq->data;
+
+ irq->argsz = sizeof(buf);
+ irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq->index = index;
+ irq->start = vector;
+ irq->count = count;
+
+ if (count) {
+ irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD;
+ memcpy(irq_fds, fds, sizeof(int) * count);
+ } else {
+ irq->flags |= VFIO_IRQ_SET_DATA_NONE;
+ }
+
+ ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq);
+}
+
+void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector)
+{
+ struct vfio_irq_set irq = {
+ .argsz = sizeof(irq),
+ .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE,
+ .index = index,
+ .start = vector,
+ .count = 1,
+ };
+
+ ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq);
+}
+
+static void check_supported_irq_index(u32 index)
+{
+ /* VFIO selftests only supports MSI and MSI-x for now. */
+ VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX ||
+ index == VFIO_PCI_MSIX_IRQ_INDEX,
+ "Unsupported IRQ index: %u\n", index);
+}
+
+void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector,
+ int count)
+{
+ int i;
+
+ check_supported_irq_index(index);
+
+ for (i = vector; i < vector + count; i++) {
+ VFIO_ASSERT_LT(device->msi_eventfds[i], 0);
+ device->msi_eventfds[i] = eventfd(0, 0);
+ VFIO_ASSERT_GE(device->msi_eventfds[i], 0);
+ }
+
+ vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector);
+}
+
+void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index)
+{
+ int i;
+
+ check_supported_irq_index(index);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
+ if (device->msi_eventfds[i] < 0)
+ continue;
+
+ VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
+ device->msi_eventfds[i] = -1;
+ }
+
+ vfio_pci_irq_set(device, index, 0, 0, NULL);
+}
+
+static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
+ struct vfio_irq_info *irq_info)
+{
+ irq_info->argsz = sizeof(*irq_info);
+ irq_info->index = index;
+
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
+}
+
+static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
+ struct vfio_region_info *info)
+{
+ memset(info, 0, sizeof(*info));
+
+ info->argsz = sizeof(*info);
+ info->index = index;
+
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info);
+}
+
+static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
+{
+ struct vfio_pci_bar *bar = &device->bars[index];
+ int prot = 0;
+
+ VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
+ VFIO_ASSERT_NULL(bar->vaddr);
+ VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
+
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
+ prot |= PROT_READ;
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
+ prot |= PROT_WRITE;
+
+ bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED,
+ device->fd, bar->info.offset);
+ VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
+}
+
+static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
+{
+ struct vfio_pci_bar *bar = &device->bars[index];
+
+ VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
+ VFIO_ASSERT_NOT_NULL(bar->vaddr);
+
+ VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0);
+ bar->vaddr = NULL;
+}
+
+static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device)
+{
+ int i;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (device->bars[i].vaddr)
+ vfio_pci_bar_unmap(device, i);
+ }
+}
+
+void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
+ size_t config, size_t size, void *data)
+{
+ struct vfio_region_info *config_space = &device->config_space;
+ int ret;
+
+ if (write)
+ ret = pwrite(device->fd, data, size, config_space->offset + config);
+ else
+ ret = pread(device->fd, data, size, config_space->offset + config);
+
+ VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n",
+ write ? "write to" : "read from", config);
+}
+
+void vfio_pci_device_reset(struct vfio_pci_device *device)
+{
+ ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL);
+}
+
+static unsigned int vfio_pci_get_group_from_dev(const char *bdf)
+{
+ char dev_iommu_group_path[PATH_MAX] = {0};
+ char sysfs_path[PATH_MAX] = {0};
+ unsigned int group;
+ int ret;
+
+ snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf);
+
+ ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path));
+ VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf);
+
+ ret = sscanf(basename(dev_iommu_group_path), "%u", &group);
+ VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf);
+
+ return group;
+}
+
+static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status),
+ };
+ char group_path[32];
+ int group;
+
+ group = vfio_pci_get_group_from_dev(bdf);
+ snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group);
+
+ device->group_fd = open(group_path, O_RDWR);
+ VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path);
+
+ ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+ VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE);
+
+ ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->iommu->container_fd);
+}
+
+static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ struct iommu *iommu = device->iommu;
+ unsigned long iommu_type = iommu->mode->iommu_type;
+ int ret;
+
+ vfio_pci_group_setup(device, bdf);
+
+ ret = ioctl(iommu->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
+ VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type);
+
+ /*
+ * Allow multiple threads to race to set the IOMMU type on the
+ * container. The first will succeed and the rest should fail
+ * because the IOMMU type is already set.
+ */
+ (void)ioctl(iommu->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
+
+ device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf);
+ VFIO_ASSERT_GE(device->fd, 0);
+}
+
+static void vfio_pci_device_setup(struct vfio_pci_device *device)
+{
+ int i;
+
+ device->info.argsz = sizeof(device->info);
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info);
+
+ vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space);
+
+ /* Sanity check VFIO does not advertise mmap for config space */
+ VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP),
+ "PCI config space should not support mmap()\n");
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ struct vfio_pci_bar *bar = device->bars + i;
+
+ vfio_pci_region_get(device, i, &bar->info);
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)
+ vfio_pci_bar_map(device, i);
+ }
+
+ vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info);
+ vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++)
+ device->msi_eventfds[i] = -1;
+}
+
+const char *vfio_pci_get_cdev_path(const char *bdf)
+{
+ char dir_path[PATH_MAX];
+ struct dirent *entry;
+ char *cdev_path;
+ DIR *dir;
+
+ cdev_path = calloc(PATH_MAX, 1);
+ VFIO_ASSERT_NOT_NULL(cdev_path);
+
+ snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf);
+
+ dir = opendir(dir_path);
+ VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path);
+
+ while ((entry = readdir(dir)) != NULL) {
+ /* Find the file that starts with "vfio" */
+ if (strncmp("vfio", entry->d_name, 4))
+ continue;
+
+ snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name);
+ break;
+ }
+
+ VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n");
+ VFIO_ASSERT_EQ(closedir(dir), 0);
+
+ return cdev_path;
+}
+
+static void vfio_device_bind_iommufd(int device_fd, int iommufd)
+{
+ struct vfio_device_bind_iommufd args = {
+ .argsz = sizeof(args),
+ .iommufd = iommufd,
+ };
+
+ ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args);
+}
+
+static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
+{
+ struct vfio_device_attach_iommufd_pt args = {
+ .argsz = sizeof(args),
+ .pt_id = pt_id,
+ };
+
+ ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args);
+}
+
+static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ const char *cdev_path = vfio_pci_get_cdev_path(bdf);
+
+ device->fd = open(cdev_path, O_RDWR);
+ VFIO_ASSERT_GE(device->fd, 0);
+ free((void *)cdev_path);
+
+ vfio_device_bind_iommufd(device->fd, device->iommu->iommufd);
+ vfio_device_attach_iommufd_pt(device->fd, device->iommu->ioas_id);
+}
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu)
+{
+ struct vfio_pci_device *device;
+
+ device = calloc(1, sizeof(*device));
+ VFIO_ASSERT_NOT_NULL(device);
+
+ VFIO_ASSERT_NOT_NULL(iommu);
+ device->iommu = iommu;
+ device->bdf = bdf;
+
+ if (iommu->mode->container_path)
+ vfio_pci_container_setup(device, bdf);
+ else
+ vfio_pci_iommufd_setup(device, bdf);
+
+ vfio_pci_device_setup(device);
+ vfio_pci_driver_probe(device);
+
+ return device;
+}
+
+void vfio_pci_device_cleanup(struct vfio_pci_device *device)
+{
+ int i;
+
+ if (device->driver.initialized)
+ vfio_pci_driver_remove(device);
+
+ vfio_pci_bar_unmap_all(device);
+
+ VFIO_ASSERT_EQ(close(device->fd), 0);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
+ if (device->msi_eventfds[i] < 0)
+ continue;
+
+ VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
+ }
+
+ if (device->group_fd)
+ VFIO_ASSERT_EQ(close(device->group_fd), 0);
+
+ free(device);
+}
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
new file mode 100644
index 000000000000..6827f4a6febe
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "kselftest.h"
+#include <libvfio.h>
+
+#ifdef __x86_64__
+extern struct vfio_pci_driver_ops dsa_ops;
+extern struct vfio_pci_driver_ops ioat_ops;
+#endif
+
+static struct vfio_pci_driver_ops *driver_ops[] = {
+#ifdef __x86_64__
+ &dsa_ops,
+ &ioat_ops,
+#endif
+};
+
+void vfio_pci_driver_probe(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver_ops *ops;
+ int i;
+
+ VFIO_ASSERT_NULL(device->driver.ops);
+
+ for (i = 0; i < ARRAY_SIZE(driver_ops); i++) {
+ ops = driver_ops[i];
+
+ if (ops->probe(device))
+ continue;
+
+ device->driver.ops = ops;
+ }
+}
+
+static void vfio_check_driver_op(struct vfio_pci_driver *driver, void *op,
+ const char *op_name)
+{
+ VFIO_ASSERT_NOT_NULL(driver->ops);
+ VFIO_ASSERT_NOT_NULL(op, "Driver has no %s()\n", op_name);
+ VFIO_ASSERT_EQ(driver->initialized, op != driver->ops->init);
+ VFIO_ASSERT_EQ(driver->memcpy_in_progress, op == driver->ops->memcpy_wait);
+}
+
+#define VFIO_CHECK_DRIVER_OP(_driver, _op) do { \
+ struct vfio_pci_driver *__driver = (_driver); \
+ vfio_check_driver_op(__driver, __driver->ops->_op, #_op); \
+} while (0)
+
+void vfio_pci_driver_init(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_ASSERT_NOT_NULL(driver->region.vaddr);
+ VFIO_CHECK_DRIVER_OP(driver, init);
+
+ driver->ops->init(device);
+
+ driver->initialized = true;
+}
+
+void vfio_pci_driver_remove(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_CHECK_DRIVER_OP(driver, remove);
+
+ driver->ops->remove(device);
+ driver->initialized = false;
+}
+
+void vfio_pci_driver_send_msi(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_CHECK_DRIVER_OP(driver, send_msi);
+
+ driver->ops->send_msi(device);
+}
+
+void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_ASSERT_LE(size, driver->max_memcpy_size);
+ VFIO_ASSERT_LE(count, driver->max_memcpy_count);
+ VFIO_CHECK_DRIVER_OP(driver, memcpy_start);
+
+ driver->ops->memcpy_start(device, src, dst, size, count);
+ driver->memcpy_in_progress = true;
+}
+
+int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+ int r;
+
+ VFIO_CHECK_DRIVER_OP(driver, memcpy_wait);
+
+ r = driver->ops->memcpy_wait(device);
+ driver->memcpy_in_progress = false;
+
+ return r;
+}
+
+int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size)
+{
+ vfio_pci_driver_memcpy_start(device, src, dst, size, 1);
+
+ return vfio_pci_driver_memcpy_wait(device);
+}
diff --git a/tools/testing/selftests/vfio/scripts/cleanup.sh b/tools/testing/selftests/vfio/scripts/cleanup.sh
new file mode 100755
index 000000000000..69c922d8aafb
--- /dev/null
+++ b/tools/testing/selftests/vfio/scripts/cleanup.sh
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+source $(dirname -- "${BASH_SOURCE[0]}")/lib.sh
+
+function cleanup_devices() {
+ local device_bdf
+ local device_dir
+
+ for device_bdf in "$@"; do
+ device_dir=${DEVICES_DIR}/${device_bdf}
+
+ if [ -f ${device_dir}/vfio-pci ]; then
+ unbind ${device_bdf} vfio-pci
+ fi
+
+ if [ -f ${device_dir}/driver_override ]; then
+ clear_driver_override ${device_bdf}
+ fi
+
+ if [ -f ${device_dir}/driver ]; then
+ bind ${device_bdf} $(cat ${device_dir}/driver)
+ fi
+
+ if [ -f ${device_dir}/sriov_numvfs ]; then
+ set_sriov_numvfs ${device_bdf} $(cat ${device_dir}/sriov_numvfs)
+ fi
+
+ rm -rf ${device_dir}
+ done
+}
+
+function main() {
+ if [ $# = 0 ]; then
+ cleanup_devices $(ls ${DEVICES_DIR})
+ rmdir ${DEVICES_DIR}
+ else
+ cleanup_devices "$@"
+ fi
+}
+
+main "$@"
diff --git a/tools/testing/selftests/vfio/scripts/lib.sh b/tools/testing/selftests/vfio/scripts/lib.sh
new file mode 100755
index 000000000000..9f05f29c7b86
--- /dev/null
+++ b/tools/testing/selftests/vfio/scripts/lib.sh
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+readonly DEVICES_DIR="${TMPDIR:-/tmp}/vfio-selftests-devices"
+
+function write_to() {
+ # Unfortunately set -x does not show redirects so use echo to manually
+ # tell the user what commands are being run.
+ echo "+ echo \"${2}\" > ${1}"
+ echo "${2}" > ${1}
+}
+
+function get_driver() {
+ if [ -L /sys/bus/pci/devices/${1}/driver ]; then
+ basename $(readlink -m /sys/bus/pci/devices/${1}/driver)
+ fi
+}
+
+function bind() {
+ write_to /sys/bus/pci/drivers/${2}/bind ${1}
+}
+
+function unbind() {
+ write_to /sys/bus/pci/drivers/${2}/unbind ${1}
+}
+
+function set_sriov_numvfs() {
+ write_to /sys/bus/pci/devices/${1}/sriov_numvfs ${2}
+}
+
+function get_sriov_numvfs() {
+ if [ -f /sys/bus/pci/devices/${1}/sriov_numvfs ]; then
+ cat /sys/bus/pci/devices/${1}/sriov_numvfs
+ fi
+}
+
+function set_driver_override() {
+ write_to /sys/bus/pci/devices/${1}/driver_override ${2}
+}
+
+function clear_driver_override() {
+ set_driver_override ${1} ""
+}
diff --git a/tools/testing/selftests/vfio/scripts/run.sh b/tools/testing/selftests/vfio/scripts/run.sh
new file mode 100755
index 000000000000..91fd38f9f6f6
--- /dev/null
+++ b/tools/testing/selftests/vfio/scripts/run.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+source $(dirname -- "${BASH_SOURCE[0]}")/lib.sh
+
+function main() {
+ local device_bdfs=$(ls ${DEVICES_DIR})
+
+ if [ -z "${device_bdfs}" ]; then
+ echo "No devices found, skipping."
+ exit 4
+ fi
+
+ "$@" ${device_bdfs}
+}
+
+main "$@"
diff --git a/tools/testing/selftests/vfio/scripts/setup.sh b/tools/testing/selftests/vfio/scripts/setup.sh
new file mode 100755
index 000000000000..49a499e51cbe
--- /dev/null
+++ b/tools/testing/selftests/vfio/scripts/setup.sh
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+set -e
+
+source $(dirname -- "${BASH_SOURCE[0]}")/lib.sh
+
+function main() {
+ local device_bdf
+ local device_dir
+ local numvfs
+ local driver
+
+ if [ $# = 0 ]; then
+ echo "usage: $0 segment:bus:device.function ..." >&2
+ exit 1
+ fi
+
+ for device_bdf in "$@"; do
+ test -d /sys/bus/pci/devices/${device_bdf}
+
+ device_dir=${DEVICES_DIR}/${device_bdf}
+ if [ -d "${device_dir}" ]; then
+ echo "${device_bdf} has already been set up, exiting."
+ exit 0
+ fi
+
+ mkdir -p ${device_dir}
+
+ numvfs=$(get_sriov_numvfs ${device_bdf})
+ if [ "${numvfs}" ]; then
+ set_sriov_numvfs ${device_bdf} 0
+ echo ${numvfs} > ${device_dir}/sriov_numvfs
+ fi
+
+ driver=$(get_driver ${device_bdf})
+ if [ "${driver}" ]; then
+ unbind ${device_bdf} ${driver}
+ echo ${driver} > ${device_dir}/driver
+ fi
+
+ set_driver_override ${device_bdf} vfio-pci
+ touch ${device_dir}/driver_override
+
+ bind ${device_bdf} vfio-pci
+ touch ${device_dir}/vfio-pci
+ done
+}
+
+main "$@"
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
new file mode 100644
index 000000000000..16eba2ecca47
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+#include "kselftest_harness.h"
+
+static const char *device_bdf;
+
+struct iommu_mapping {
+ u64 pgd;
+ u64 p4d;
+ u64 pud;
+ u64 pmd;
+ u64 pte;
+};
+
+static void parse_next_value(char **line, u64 *value)
+{
+ char *token;
+
+ token = strtok_r(*line, " \t|\n", line);
+ if (!token)
+ return;
+
+ /* Caller verifies `value`. No need to check return value. */
+ sscanf(token, "0x%lx", value);
+}
+
+static int intel_iommu_mapping_get(const char *bdf, u64 iova,
+ struct iommu_mapping *mapping)
+{
+ char iommu_mapping_path[PATH_MAX], line[PATH_MAX];
+ u64 line_iova = -1;
+ int ret = -ENOENT;
+ FILE *file;
+ char *rest;
+
+ snprintf(iommu_mapping_path, sizeof(iommu_mapping_path),
+ "/sys/kernel/debug/iommu/intel/%s/domain_translation_struct",
+ bdf);
+
+ printf("Searching for IOVA 0x%lx in %s\n", iova, iommu_mapping_path);
+
+ file = fopen(iommu_mapping_path, "r");
+ VFIO_ASSERT_NOT_NULL(file, "fopen(%s) failed", iommu_mapping_path);
+
+ while (fgets(line, sizeof(line), file)) {
+ rest = line;
+
+ parse_next_value(&rest, &line_iova);
+ if (line_iova != (iova / getpagesize()))
+ continue;
+
+ /*
+ * Ensure each struct field is initialized in case of empty
+ * page table values.
+ */
+ memset(mapping, 0, sizeof(*mapping));
+ parse_next_value(&rest, &mapping->pgd);
+ parse_next_value(&rest, &mapping->p4d);
+ parse_next_value(&rest, &mapping->pud);
+ parse_next_value(&rest, &mapping->pmd);
+ parse_next_value(&rest, &mapping->pte);
+
+ ret = 0;
+ break;
+ }
+
+ fclose(file);
+
+ if (ret)
+ printf("IOVA not found\n");
+
+ return ret;
+}
+
+static int iommu_mapping_get(const char *bdf, u64 iova,
+ struct iommu_mapping *mapping)
+{
+ if (!access("/sys/kernel/debug/iommu/intel", F_OK))
+ return intel_iommu_mapping_get(bdf, iova, mapping);
+
+ return -EOPNOTSUPP;
+}
+
+FIXTURE(vfio_dma_mapping_test) {
+ struct iommu *iommu;
+ struct vfio_pci_device *device;
+ struct iova_allocator *iova_allocator;
+};
+
+FIXTURE_VARIANT(vfio_dma_mapping_test) {
+ const char *iommu_mode;
+ u64 size;
+ int mmap_flags;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode, _name, _size, _mmap_flags) \
+FIXTURE_VARIANT_ADD(vfio_dma_mapping_test, _iommu_mode ## _ ## _name) { \
+ .iommu_mode = #_iommu_mode, \
+ .size = (_size), \
+ .mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE | (_mmap_flags), \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous, 0, 0);
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_2mb, SZ_2M, MAP_HUGETLB | MAP_HUGE_2MB);
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | MAP_HUGE_1GB);
+
+#undef FIXTURE_VARIANT_ADD_IOMMU_MODE
+
+FIXTURE_SETUP(vfio_dma_mapping_test)
+{
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
+ self->iova_allocator = iova_allocator_init(self->iommu);
+}
+
+FIXTURE_TEARDOWN(vfio_dma_mapping_test)
+{
+ iova_allocator_cleanup(self->iova_allocator);
+ vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
+}
+
+TEST_F(vfio_dma_mapping_test, dma_map_unmap)
+{
+ const u64 size = variant->size ?: getpagesize();
+ const int flags = variant->mmap_flags;
+ struct dma_region region;
+ struct iommu_mapping mapping;
+ u64 mapping_size = size;
+ u64 unmapped;
+ int rc;
+
+ region.vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
+
+ /* Skip the test if there aren't enough HugeTLB pages available. */
+ if (flags & MAP_HUGETLB && region.vaddr == MAP_FAILED)
+ SKIP(return, "mmap() failed: %s (%d)\n", strerror(errno), errno);
+ else
+ ASSERT_NE(region.vaddr, MAP_FAILED);
+
+ region.iova = iova_allocator_alloc(self->iova_allocator, size);
+ region.size = size;
+
+ iommu_map(self->iommu, &region);
+ printf("Mapped HVA %p (size 0x%lx) at IOVA 0x%lx\n", region.vaddr, size, region.iova);
+
+ ASSERT_EQ(region.iova, to_iova(self->device, region.vaddr));
+
+ rc = iommu_mapping_get(device_bdf, region.iova, &mapping);
+ if (rc == -EOPNOTSUPP)
+ goto unmap;
+
+ /*
+ * IOMMUFD compatibility-mode does not support huge mappings when
+ * using VFIO_TYPE1_IOMMU.
+ */
+ if (!strcmp(variant->iommu_mode, "iommufd_compat_type1"))
+ mapping_size = SZ_4K;
+
+ ASSERT_EQ(0, rc);
+ printf("Found IOMMU mappings for IOVA 0x%lx:\n", region.iova);
+ printf("PGD: 0x%016lx\n", mapping.pgd);
+ printf("P4D: 0x%016lx\n", mapping.p4d);
+ printf("PUD: 0x%016lx\n", mapping.pud);
+ printf("PMD: 0x%016lx\n", mapping.pmd);
+ printf("PTE: 0x%016lx\n", mapping.pte);
+
+ switch (mapping_size) {
+ case SZ_4K:
+ ASSERT_NE(0, mapping.pte);
+ break;
+ case SZ_2M:
+ ASSERT_EQ(0, mapping.pte);
+ ASSERT_NE(0, mapping.pmd);
+ break;
+ case SZ_1G:
+ ASSERT_EQ(0, mapping.pte);
+ ASSERT_EQ(0, mapping.pmd);
+ ASSERT_NE(0, mapping.pud);
+ break;
+ default:
+ VFIO_FAIL("Unrecognized size: 0x%lx\n", mapping_size);
+ }
+
+unmap:
+ rc = __iommu_unmap(self->iommu, &region, &unmapped);
+ ASSERT_EQ(rc, 0);
+ ASSERT_EQ(unmapped, region.size);
+ printf("Unmapped IOVA 0x%lx\n", region.iova);
+ ASSERT_NE(0, __to_iova(self->device, region.vaddr, NULL));
+ ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping));
+
+ ASSERT_TRUE(!munmap(region.vaddr, size));
+}
+
+FIXTURE(vfio_dma_map_limit_test) {
+ struct iommu *iommu;
+ struct vfio_pci_device *device;
+ struct dma_region region;
+ size_t mmap_size;
+};
+
+FIXTURE_VARIANT(vfio_dma_map_limit_test) {
+ const char *iommu_mode;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
+FIXTURE_VARIANT_ADD(vfio_dma_map_limit_test, _iommu_mode) { \
+ .iommu_mode = #_iommu_mode, \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
+
+#undef FIXTURE_VARIANT_ADD_IOMMU_MODE
+
+FIXTURE_SETUP(vfio_dma_map_limit_test)
+{
+ struct dma_region *region = &self->region;
+ struct iommu_iova_range *ranges;
+ u64 region_size = getpagesize();
+ iova_t last_iova;
+ u32 nranges;
+
+ /*
+ * Over-allocate mmap by double the size to provide enough backing vaddr
+ * for overflow tests
+ */
+ self->mmap_size = 2 * region_size;
+
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
+ region->vaddr = mmap(NULL, self->mmap_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(region->vaddr, MAP_FAILED);
+
+ ranges = iommu_iova_ranges(self->iommu, &nranges);
+ VFIO_ASSERT_NOT_NULL(ranges);
+ last_iova = ranges[nranges - 1].last;
+ free(ranges);
+
+ /* One page prior to the last iova */
+ region->iova = last_iova & ~(region_size - 1);
+ region->size = region_size;
+}
+
+FIXTURE_TEARDOWN(vfio_dma_map_limit_test)
+{
+ vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
+ ASSERT_EQ(munmap(self->region.vaddr, self->mmap_size), 0);
+}
+
+TEST_F(vfio_dma_map_limit_test, unmap_range)
+{
+ struct dma_region *region = &self->region;
+ u64 unmapped;
+ int rc;
+
+ iommu_map(self->iommu, region);
+ ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
+
+ rc = __iommu_unmap(self->iommu, region, &unmapped);
+ ASSERT_EQ(rc, 0);
+ ASSERT_EQ(unmapped, region->size);
+}
+
+TEST_F(vfio_dma_map_limit_test, unmap_all)
+{
+ struct dma_region *region = &self->region;
+ u64 unmapped;
+ int rc;
+
+ iommu_map(self->iommu, region);
+ ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
+
+ rc = __iommu_unmap_all(self->iommu, &unmapped);
+ ASSERT_EQ(rc, 0);
+ ASSERT_EQ(unmapped, region->size);
+}
+
+TEST_F(vfio_dma_map_limit_test, overflow)
+{
+ struct dma_region *region = &self->region;
+ int rc;
+
+ region->iova = ~(iova_t)0 & ~(region->size - 1);
+ region->size = self->mmap_size;
+
+ rc = __iommu_map(self->iommu, region);
+ ASSERT_EQ(rc, -EOVERFLOW);
+
+ rc = __iommu_unmap(self->iommu, region, NULL);
+ ASSERT_EQ(rc, -EOVERFLOW);
+}
+
+int main(int argc, char *argv[])
+{
+ device_bdf = vfio_selftests_get_bdf(&argc, argv);
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
new file mode 100644
index 000000000000..17017ed3beac
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/types.h>
+#include <linux/limits.h>
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <libvfio.h>
+#include "kselftest_harness.h"
+
+static const char iommu_dev_path[] = "/dev/iommu";
+static const char *cdev_path;
+
+static int vfio_device_bind_iommufd_ioctl(int cdev_fd, int iommufd)
+{
+ struct vfio_device_bind_iommufd bind_args = {
+ .argsz = sizeof(bind_args),
+ .iommufd = iommufd,
+ };
+
+ return ioctl(cdev_fd, VFIO_DEVICE_BIND_IOMMUFD, &bind_args);
+}
+
+static int vfio_device_get_info_ioctl(int cdev_fd)
+{
+ struct vfio_device_info info_args = { .argsz = sizeof(info_args) };
+
+ return ioctl(cdev_fd, VFIO_DEVICE_GET_INFO, &info_args);
+}
+
+static int vfio_device_ioas_alloc_ioctl(int iommufd, struct iommu_ioas_alloc *alloc_args)
+{
+ *alloc_args = (struct iommu_ioas_alloc){
+ .size = sizeof(struct iommu_ioas_alloc),
+ };
+
+ return ioctl(iommufd, IOMMU_IOAS_ALLOC, alloc_args);
+}
+
+static int vfio_device_attach_iommufd_pt_ioctl(int cdev_fd, u32 pt_id)
+{
+ struct vfio_device_attach_iommufd_pt attach_args = {
+ .argsz = sizeof(attach_args),
+ .pt_id = pt_id,
+ };
+
+ return ioctl(cdev_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_args);
+}
+
+static int vfio_device_detach_iommufd_pt_ioctl(int cdev_fd)
+{
+ struct vfio_device_detach_iommufd_pt detach_args = {
+ .argsz = sizeof(detach_args),
+ };
+
+ return ioctl(cdev_fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_args);
+}
+
+FIXTURE(vfio_cdev) {
+ int cdev_fd;
+ int iommufd;
+};
+
+FIXTURE_SETUP(vfio_cdev)
+{
+ ASSERT_LE(0, (self->cdev_fd = open(cdev_path, O_RDWR, 0)));
+ ASSERT_LE(0, (self->iommufd = open(iommu_dev_path, O_RDWR, 0)));
+}
+
+FIXTURE_TEARDOWN(vfio_cdev)
+{
+ ASSERT_EQ(0, close(self->cdev_fd));
+ ASSERT_EQ(0, close(self->iommufd));
+}
+
+TEST_F(vfio_cdev, bind)
+{
+ ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+ ASSERT_EQ(0, vfio_device_get_info_ioctl(self->cdev_fd));
+}
+
+TEST_F(vfio_cdev, get_info_without_bind_fails)
+{
+ ASSERT_NE(0, vfio_device_get_info_ioctl(self->cdev_fd));
+}
+
+TEST_F(vfio_cdev, bind_bad_iommufd_fails)
+{
+ ASSERT_NE(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, -2));
+}
+
+TEST_F(vfio_cdev, repeated_bind_fails)
+{
+ ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+ ASSERT_NE(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+}
+
+TEST_F(vfio_cdev, attach_detatch_pt)
+{
+ struct iommu_ioas_alloc alloc_args;
+
+ ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+ ASSERT_EQ(0, vfio_device_ioas_alloc_ioctl(self->iommufd, &alloc_args));
+ ASSERT_EQ(0, vfio_device_attach_iommufd_pt_ioctl(self->cdev_fd, alloc_args.out_ioas_id));
+ ASSERT_EQ(0, vfio_device_detach_iommufd_pt_ioctl(self->cdev_fd));
+}
+
+TEST_F(vfio_cdev, attach_invalid_pt_fails)
+{
+ ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+ ASSERT_NE(0, vfio_device_attach_iommufd_pt_ioctl(self->cdev_fd, UINT32_MAX));
+}
+
+int main(int argc, char *argv[])
+{
+ const char *device_bdf = vfio_selftests_get_bdf(&argc, argv);
+
+ cdev_path = vfio_pci_get_cdev_path(device_bdf);
+ printf("Using cdev device %s\n", cdev_path);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c b/tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c
new file mode 100644
index 000000000000..33b0c31fe2ed
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <pthread.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/sizes.h>
+#include <linux/time64.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+#include "../kselftest_harness.h"
+
+static char **device_bdfs;
+static int nr_devices;
+
+struct thread_args {
+ struct iommu *iommu;
+ int device_index;
+ struct timespec start;
+ struct timespec end;
+ pthread_barrier_t *barrier;
+};
+
+FIXTURE(vfio_pci_device_init_perf_test) {
+ pthread_t *threads;
+ pthread_barrier_t barrier;
+ struct thread_args *thread_args;
+ struct iommu *iommu;
+};
+
+FIXTURE_VARIANT(vfio_pci_device_init_perf_test) {
+ const char *iommu_mode;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
+FIXTURE_VARIANT_ADD(vfio_pci_device_init_perf_test, _iommu_mode) { \
+ .iommu_mode = #_iommu_mode, \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
+
+FIXTURE_SETUP(vfio_pci_device_init_perf_test)
+{
+ int i;
+
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->threads = calloc(nr_devices, sizeof(self->threads[0]));
+ self->thread_args = calloc(nr_devices, sizeof(self->thread_args[0]));
+
+ pthread_barrier_init(&self->barrier, NULL, nr_devices);
+
+ for (i = 0; i < nr_devices; i++) {
+ self->thread_args[i].iommu = self->iommu;
+ self->thread_args[i].barrier = &self->barrier;
+ self->thread_args[i].device_index = i;
+ }
+}
+
+FIXTURE_TEARDOWN(vfio_pci_device_init_perf_test)
+{
+ iommu_cleanup(self->iommu);
+ free(self->threads);
+ free(self->thread_args);
+}
+
+static s64 to_ns(struct timespec ts)
+{
+ return (s64)ts.tv_nsec + NSEC_PER_SEC * (s64)ts.tv_sec;
+}
+
+static struct timespec to_timespec(s64 ns)
+{
+ struct timespec ts = {
+ .tv_nsec = ns % NSEC_PER_SEC,
+ .tv_sec = ns / NSEC_PER_SEC,
+ };
+
+ return ts;
+}
+
+static struct timespec timespec_sub(struct timespec a, struct timespec b)
+{
+ return to_timespec(to_ns(a) - to_ns(b));
+}
+
+static struct timespec timespec_min(struct timespec a, struct timespec b)
+{
+ return to_ns(a) < to_ns(b) ? a : b;
+}
+
+static struct timespec timespec_max(struct timespec a, struct timespec b)
+{
+ return to_ns(a) > to_ns(b) ? a : b;
+}
+
+static void *thread_main(void *__args)
+{
+ struct thread_args *args = __args;
+ struct vfio_pci_device *device;
+
+ pthread_barrier_wait(args->barrier);
+
+ clock_gettime(CLOCK_MONOTONIC, &args->start);
+ device = vfio_pci_device_init(device_bdfs[args->device_index], args->iommu);
+ clock_gettime(CLOCK_MONOTONIC, &args->end);
+
+ pthread_barrier_wait(args->barrier);
+
+ vfio_pci_device_cleanup(device);
+ return NULL;
+}
+
+TEST_F(vfio_pci_device_init_perf_test, init)
+{
+ struct timespec start = to_timespec(INT64_MAX), end = {};
+ struct timespec min = to_timespec(INT64_MAX);
+ struct timespec max = {};
+ struct timespec avg = {};
+ struct timespec wall_time;
+ s64 thread_ns = 0;
+ int i;
+
+ for (i = 0; i < nr_devices; i++) {
+ pthread_create(&self->threads[i], NULL, thread_main,
+ &self->thread_args[i]);
+ }
+
+ for (i = 0; i < nr_devices; i++) {
+ struct thread_args *args = &self->thread_args[i];
+ struct timespec init_time;
+
+ pthread_join(self->threads[i], NULL);
+
+ start = timespec_min(start, args->start);
+ end = timespec_max(end, args->end);
+
+ init_time = timespec_sub(args->end, args->start);
+ min = timespec_min(min, init_time);
+ max = timespec_max(max, init_time);
+ thread_ns += to_ns(init_time);
+ }
+
+ avg = to_timespec(thread_ns / nr_devices);
+ wall_time = timespec_sub(end, start);
+
+ printf("Wall time: %lu.%09lus\n",
+ wall_time.tv_sec, wall_time.tv_nsec);
+ printf("Min init time (per device): %lu.%09lus\n",
+ min.tv_sec, min.tv_nsec);
+ printf("Max init time (per device): %lu.%09lus\n",
+ max.tv_sec, max.tv_nsec);
+ printf("Avg init time (per device): %lu.%09lus\n",
+ avg.tv_sec, avg.tv_nsec);
+}
+
+int main(int argc, char *argv[])
+{
+ int i;
+
+ device_bdfs = vfio_selftests_get_bdfs(&argc, argv, &nr_devices);
+
+ printf("Testing parallel initialization of %d devices:\n", nr_devices);
+ for (i = 0; i < nr_devices; i++)
+ printf(" %s\n", device_bdfs[i]);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_pci_device_test.c b/tools/testing/selftests/vfio/vfio_pci_device_test.c
new file mode 100644
index 000000000000..7c0fe8ce3a61
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_pci_device_test.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdlib.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/limits.h>
+#include <linux/pci_regs.h>
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+#include "kselftest_harness.h"
+
+static const char *device_bdf;
+
+/*
+ * Limit the number of MSIs enabled/disabled by the test regardless of the
+ * number of MSIs the device itself supports, e.g. to avoid hitting IRTE limits.
+ */
+#define MAX_TEST_MSI 16U
+
+FIXTURE(vfio_pci_device_test) {
+ struct iommu *iommu;
+ struct vfio_pci_device *device;
+};
+
+FIXTURE_SETUP(vfio_pci_device_test)
+{
+ self->iommu = iommu_init(default_iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
+}
+
+FIXTURE_TEARDOWN(vfio_pci_device_test)
+{
+ vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
+}
+
+#define read_pci_id_from_sysfs(_file) ({ \
+ char __sysfs_path[PATH_MAX]; \
+ char __buf[32]; \
+ int __fd; \
+ \
+ snprintf(__sysfs_path, PATH_MAX, "/sys/bus/pci/devices/%s/%s", device_bdf, _file); \
+ ASSERT_GT((__fd = open(__sysfs_path, O_RDONLY)), 0); \
+ ASSERT_GT(read(__fd, __buf, ARRAY_SIZE(__buf)), 0); \
+ ASSERT_EQ(0, close(__fd)); \
+ (u16)strtoul(__buf, NULL, 0); \
+})
+
+TEST_F(vfio_pci_device_test, config_space_read_write)
+{
+ u16 vendor, device;
+ u16 command;
+
+ /* Check that Vendor and Device match what the kernel reports. */
+ vendor = read_pci_id_from_sysfs("vendor");
+ device = read_pci_id_from_sysfs("device");
+ ASSERT_TRUE(vfio_pci_device_match(self->device, vendor, device));
+
+ printf("Vendor: %04x, Device: %04x\n", vendor, device);
+
+ command = vfio_pci_config_readw(self->device, PCI_COMMAND);
+ ASSERT_FALSE(command & PCI_COMMAND_MASTER);
+
+ vfio_pci_config_writew(self->device, PCI_COMMAND, command | PCI_COMMAND_MASTER);
+ command = vfio_pci_config_readw(self->device, PCI_COMMAND);
+ ASSERT_TRUE(command & PCI_COMMAND_MASTER);
+ printf("Enabled Bus Mastering (command: %04x)\n", command);
+
+ vfio_pci_config_writew(self->device, PCI_COMMAND, command & ~PCI_COMMAND_MASTER);
+ command = vfio_pci_config_readw(self->device, PCI_COMMAND);
+ ASSERT_FALSE(command & PCI_COMMAND_MASTER);
+ printf("Disabled Bus Mastering (command: %04x)\n", command);
+}
+
+TEST_F(vfio_pci_device_test, validate_bars)
+{
+ struct vfio_pci_bar *bar;
+ int i;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ bar = &self->device->bars[i];
+
+ if (!(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) {
+ printf("BAR %d does not support mmap()\n", i);
+ ASSERT_EQ(NULL, bar->vaddr);
+ continue;
+ }
+
+ /*
+ * BARs that support mmap() should be automatically mapped by
+ * vfio_pci_device_init().
+ */
+ ASSERT_NE(NULL, bar->vaddr);
+ ASSERT_NE(0, bar->info.size);
+ printf("BAR %d mapped at %p (size 0x%llx)\n", i, bar->vaddr, bar->info.size);
+ }
+}
+
+FIXTURE(vfio_pci_irq_test) {
+ struct iommu *iommu;
+ struct vfio_pci_device *device;
+};
+
+FIXTURE_VARIANT(vfio_pci_irq_test) {
+ int irq_index;
+};
+
+FIXTURE_VARIANT_ADD(vfio_pci_irq_test, msi) {
+ .irq_index = VFIO_PCI_MSI_IRQ_INDEX,
+};
+
+FIXTURE_VARIANT_ADD(vfio_pci_irq_test, msix) {
+ .irq_index = VFIO_PCI_MSIX_IRQ_INDEX,
+};
+
+FIXTURE_SETUP(vfio_pci_irq_test)
+{
+ self->iommu = iommu_init(default_iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
+}
+
+FIXTURE_TEARDOWN(vfio_pci_irq_test)
+{
+ vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
+}
+
+TEST_F(vfio_pci_irq_test, enable_trigger_disable)
+{
+ bool msix = variant->irq_index == VFIO_PCI_MSIX_IRQ_INDEX;
+ int msi_eventfd;
+ u32 count;
+ u64 value;
+ int i;
+
+ if (msix)
+ count = self->device->msix_info.count;
+ else
+ count = self->device->msi_info.count;
+
+ count = min(count, MAX_TEST_MSI);
+
+ if (!count)
+ SKIP(return, "MSI%s: not supported\n", msix ? "-x" : "");
+
+ vfio_pci_irq_enable(self->device, variant->irq_index, 0, count);
+ printf("MSI%s: enabled %d interrupts\n", msix ? "-x" : "", count);
+
+ for (i = 0; i < count; i++) {
+ msi_eventfd = self->device->msi_eventfds[i];
+
+ fcntl_set_nonblock(msi_eventfd);
+ ASSERT_EQ(-1, read(msi_eventfd, &value, 8));
+ ASSERT_EQ(EAGAIN, errno);
+
+ vfio_pci_irq_trigger(self->device, variant->irq_index, i);
+
+ ASSERT_EQ(8, read(msi_eventfd, &value, 8));
+ ASSERT_EQ(1, value);
+ }
+
+ vfio_pci_irq_disable(self->device, variant->irq_index);
+}
+
+TEST_F(vfio_pci_device_test, reset)
+{
+ if (!(self->device->info.flags & VFIO_DEVICE_FLAGS_RESET))
+ SKIP(return, "Device does not support reset\n");
+
+ vfio_pci_device_reset(self->device);
+}
+
+int main(int argc, char *argv[])
+{
+ device_bdf = vfio_selftests_get_bdf(&argc, argv);
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
new file mode 100644
index 000000000000..afa0480ddd9b
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+#include "kselftest_harness.h"
+
+static const char *device_bdf;
+
+#define ASSERT_NO_MSI(_eventfd) do { \
+ u64 __value; \
+ \
+ ASSERT_EQ(-1, read(_eventfd, &__value, 8)); \
+ ASSERT_EQ(EAGAIN, errno); \
+} while (0)
+
+static void region_setup(struct iommu *iommu,
+ struct iova_allocator *iova_allocator,
+ struct dma_region *region, u64 size)
+{
+ const int flags = MAP_SHARED | MAP_ANONYMOUS;
+ const int prot = PROT_READ | PROT_WRITE;
+ void *vaddr;
+
+ vaddr = mmap(NULL, size, prot, flags, -1, 0);
+ VFIO_ASSERT_NE(vaddr, MAP_FAILED);
+
+ region->vaddr = vaddr;
+ region->iova = iova_allocator_alloc(iova_allocator, size);
+ region->size = size;
+
+ iommu_map(iommu, region);
+}
+
+static void region_teardown(struct iommu *iommu, struct dma_region *region)
+{
+ iommu_unmap(iommu, region);
+ VFIO_ASSERT_EQ(munmap(region->vaddr, region->size), 0);
+}
+
+FIXTURE(vfio_pci_driver_test) {
+ struct iommu *iommu;
+ struct vfio_pci_device *device;
+ struct iova_allocator *iova_allocator;
+ struct dma_region memcpy_region;
+ void *vaddr;
+ int msi_fd;
+
+ u64 size;
+ void *src;
+ void *dst;
+ iova_t src_iova;
+ iova_t dst_iova;
+ iova_t unmapped_iova;
+};
+
+FIXTURE_VARIANT(vfio_pci_driver_test) {
+ const char *iommu_mode;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
+FIXTURE_VARIANT_ADD(vfio_pci_driver_test, _iommu_mode) { \
+ .iommu_mode = #_iommu_mode, \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
+
+FIXTURE_SETUP(vfio_pci_driver_test)
+{
+ struct vfio_pci_driver *driver;
+
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
+ self->iova_allocator = iova_allocator_init(self->iommu);
+
+ driver = &self->device->driver;
+
+ region_setup(self->iommu, self->iova_allocator, &self->memcpy_region, SZ_1G);
+ region_setup(self->iommu, self->iova_allocator, &driver->region, SZ_2M);
+
+ /* Any IOVA that doesn't overlap memcpy_region and driver->region. */
+ self->unmapped_iova = iova_allocator_alloc(self->iova_allocator, SZ_1G);
+
+ vfio_pci_driver_init(self->device);
+ self->msi_fd = self->device->msi_eventfds[driver->msi];
+
+ /*
+ * Use the maximum size supported by the device for memcpy operations,
+ * slimmed down to fit into the memcpy region (divided by 2 so src and
+ * dst regions do not overlap).
+ */
+ self->size = self->device->driver.max_memcpy_size;
+ self->size = min(self->size, self->memcpy_region.size / 2);
+
+ self->src = self->memcpy_region.vaddr;
+ self->dst = self->src + self->size;
+
+ self->src_iova = to_iova(self->device, self->src);
+ self->dst_iova = to_iova(self->device, self->dst);
+}
+
+FIXTURE_TEARDOWN(vfio_pci_driver_test)
+{
+ struct vfio_pci_driver *driver = &self->device->driver;
+
+ vfio_pci_driver_remove(self->device);
+
+ region_teardown(self->iommu, &self->memcpy_region);
+ region_teardown(self->iommu, &driver->region);
+
+ iova_allocator_cleanup(self->iova_allocator);
+ vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
+}
+
+TEST_F(vfio_pci_driver_test, init_remove)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ vfio_pci_driver_remove(self->device);
+ vfio_pci_driver_init(self->device);
+ }
+}
+
+TEST_F(vfio_pci_driver_test, memcpy_success)
+{
+ fcntl_set_nonblock(self->msi_fd);
+
+ memset(self->src, 'x', self->size);
+ memset(self->dst, 'y', self->size);
+
+ ASSERT_EQ(0, vfio_pci_driver_memcpy(self->device,
+ self->src_iova,
+ self->dst_iova,
+ self->size));
+
+ ASSERT_EQ(0, memcmp(self->src, self->dst, self->size));
+ ASSERT_NO_MSI(self->msi_fd);
+}
+
+TEST_F(vfio_pci_driver_test, memcpy_from_unmapped_iova)
+{
+ fcntl_set_nonblock(self->msi_fd);
+
+ /*
+ * Ignore the return value since not all devices will detect and report
+ * accesses to unmapped IOVAs as errors.
+ */
+ vfio_pci_driver_memcpy(self->device, self->unmapped_iova,
+ self->dst_iova, self->size);
+
+ ASSERT_NO_MSI(self->msi_fd);
+}
+
+TEST_F(vfio_pci_driver_test, memcpy_to_unmapped_iova)
+{
+ fcntl_set_nonblock(self->msi_fd);
+
+ /*
+ * Ignore the return value since not all devices will detect and report
+ * accesses to unmapped IOVAs as errors.
+ */
+ vfio_pci_driver_memcpy(self->device, self->src_iova,
+ self->unmapped_iova, self->size);
+
+ ASSERT_NO_MSI(self->msi_fd);
+}
+
+TEST_F(vfio_pci_driver_test, send_msi)
+{
+ u64 value;
+
+ vfio_pci_driver_send_msi(self->device);
+ ASSERT_EQ(8, read(self->msi_fd, &value, 8));
+ ASSERT_EQ(1, value);
+}
+
+TEST_F(vfio_pci_driver_test, mix_and_match)
+{
+ u64 value;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ memset(self->src, 'x', self->size);
+ memset(self->dst, 'y', self->size);
+
+ ASSERT_EQ(0, vfio_pci_driver_memcpy(self->device,
+ self->src_iova,
+ self->dst_iova,
+ self->size));
+
+ ASSERT_EQ(0, memcmp(self->src, self->dst, self->size));
+
+ vfio_pci_driver_memcpy(self->device,
+ self->unmapped_iova,
+ self->dst_iova,
+ self->size);
+
+ vfio_pci_driver_send_msi(self->device);
+ ASSERT_EQ(8, read(self->msi_fd, &value, 8));
+ ASSERT_EQ(1, value);
+ }
+}
+
+TEST_F_TIMEOUT(vfio_pci_driver_test, memcpy_storm, 60)
+{
+ struct vfio_pci_driver *driver = &self->device->driver;
+ u64 total_size;
+ u64 count;
+
+ fcntl_set_nonblock(self->msi_fd);
+
+ /*
+ * Perform up to 250GiB worth of DMA reads and writes across several
+ * memcpy operations. Some devices can support even more but the test
+ * will take too long.
+ */
+ total_size = 250UL * SZ_1G;
+ count = min(total_size / self->size, driver->max_memcpy_count);
+
+ printf("Kicking off %lu memcpys of size 0x%lx\n", count, self->size);
+ vfio_pci_driver_memcpy_start(self->device,
+ self->src_iova,
+ self->dst_iova,
+ self->size, count);
+
+ ASSERT_EQ(0, vfio_pci_driver_memcpy_wait(self->device));
+ ASSERT_NO_MSI(self->msi_fd);
+}
+
+static bool device_has_selftests_driver(const char *bdf)
+{
+ struct vfio_pci_device *device;
+ struct iommu *iommu;
+ bool has_driver;
+
+ iommu = iommu_init(default_iommu_mode);
+ device = vfio_pci_device_init(device_bdf, iommu);
+
+ has_driver = !!device->driver.ops;
+
+ vfio_pci_device_cleanup(device);
+ iommu_cleanup(iommu);
+
+ return has_driver;
+}
+
+int main(int argc, char *argv[])
+{
+ device_bdf = vfio_selftests_get_bdf(&argc, argv);
+
+ if (!device_has_selftests_driver(device_bdf)) {
+ fprintf(stderr, "No driver found for device %s\n", device_bdf);
+ return KSFT_SKIP;
+ }
+
+ return test_harness_run(argc, argv);
+}