summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/vfio/lib
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/vfio/lib')
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c416
l---------tools/testing/selftests/vfio/lib/drivers/dsa/registers.h1
l---------tools/testing/selftests/vfio/lib/drivers/ioat/hw.h1
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c235
l---------tools/testing/selftests/vfio/lib/drivers/ioat/registers.h1
-rw-r--r--tools/testing/selftests/vfio/lib/include/vfio_util.h295
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk24
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c594
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c126
9 files changed, 1693 insertions, 0 deletions
diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
new file mode 100644
index 000000000000..0ca2cbc2a316
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <unistd.h>
+
+#include <linux/bits.h>
+#include <linux/errno.h>
+#include <linux/idxd.h>
+#include <linux/io.h>
+#include <linux/pci_ids.h>
+#include <linux/sizes.h>
+
+#include <vfio_util.h>
+
+#include "registers.h"
+
+/* Vectors 1+ are available for work queue completion interrupts. */
+#define MSIX_VECTOR 1
+
+struct dsa_state {
+ /* Descriptors for copy and batch operations. */
+ struct dsa_hw_desc batch[32];
+ struct dsa_hw_desc copy[1024];
+
+ /* Completion records for copy and batch operations. */
+ struct dsa_completion_record copy_completion;
+ struct dsa_completion_record batch_completion;
+
+ /* Cached device registers (and derived data) for easy access */
+ union gen_cap_reg gen_cap;
+ union wq_cap_reg wq_cap;
+ union group_cap_reg group_cap;
+ union engine_cap_reg engine_cap;
+ union offsets_reg table_offsets;
+ void *wqcfg_table;
+ void *grpcfg_table;
+ u64 max_batches;
+ u64 max_copies_per_batch;
+
+ /* The number of ongoing memcpy operations. */
+ u64 memcpy_count;
+
+ /* Buffers used by dsa_send_msi() to generate an interrupt */
+ u64 send_msi_src;
+ u64 send_msi_dst;
+};
+
+static inline struct dsa_state *to_dsa_state(struct vfio_pci_device *device)
+{
+ return device->driver.region.vaddr;
+}
+
+static bool dsa_int_handle_request_required(struct vfio_pci_device *device)
+{
+ void *bar0 = device->bars[0].vaddr;
+ union gen_cap_reg gen_cap;
+ u32 cmd_cap;
+
+ gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET);
+ if (!gen_cap.cmd_cap)
+ return false;
+
+ cmd_cap = readl(bar0 + IDXD_CMDCAP_OFFSET);
+ return (cmd_cap >> IDXD_CMD_REQUEST_INT_HANDLE) & 1;
+}
+
+static int dsa_probe(struct vfio_pci_device *device)
+{
+ if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_DSA_SPR0))
+ return -EINVAL;
+
+ if (dsa_int_handle_request_required(device)) {
+ printf("Device requires requesting interrupt handles\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void dsa_check_sw_err(struct vfio_pci_device *device)
+{
+ void *reg = device->bars[0].vaddr + IDXD_SWERR_OFFSET;
+ union sw_err_reg err = {};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(err.bits); i++) {
+ err.bits[i] = readq(reg + offsetof(union sw_err_reg, bits[i]));
+
+ /* No errors */
+ if (i == 0 && !err.valid)
+ return;
+ }
+
+ fprintf(stderr, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
+ err.bits[0], err.bits[1], err.bits[2], err.bits[3]);
+
+ fprintf(stderr, " valid: 0x%x\n", err.valid);
+ fprintf(stderr, " overflow: 0x%x\n", err.overflow);
+ fprintf(stderr, " desc_valid: 0x%x\n", err.desc_valid);
+ fprintf(stderr, " wq_idx_valid: 0x%x\n", err.wq_idx_valid);
+ fprintf(stderr, " batch: 0x%x\n", err.batch);
+ fprintf(stderr, " fault_rw: 0x%x\n", err.fault_rw);
+ fprintf(stderr, " priv: 0x%x\n", err.priv);
+ fprintf(stderr, " error: 0x%x\n", err.error);
+ fprintf(stderr, " wq_idx: 0x%x\n", err.wq_idx);
+ fprintf(stderr, " operation: 0x%x\n", err.operation);
+ fprintf(stderr, " pasid: 0x%x\n", err.pasid);
+ fprintf(stderr, " batch_idx: 0x%x\n", err.batch_idx);
+ fprintf(stderr, " invalid_flags: 0x%x\n", err.invalid_flags);
+ fprintf(stderr, " fault_addr: 0x%lx\n", err.fault_addr);
+
+ VFIO_FAIL("Software Error Detected!\n");
+}
+
+static void dsa_command(struct vfio_pci_device *device, u32 cmd)
+{
+ union idxd_command_reg cmd_reg = { .cmd = cmd };
+ u32 sleep_ms = 1, attempts = 5000 / sleep_ms;
+ void *bar0 = device->bars[0].vaddr;
+ u32 status;
+ u8 err;
+
+ writel(cmd_reg.bits, bar0 + IDXD_CMD_OFFSET);
+
+ for (;;) {
+ dsa_check_sw_err(device);
+
+ status = readl(bar0 + IDXD_CMDSTS_OFFSET);
+ if (!(status & IDXD_CMDSTS_ACTIVE))
+ break;
+
+ VFIO_ASSERT_GT(--attempts, 0);
+ usleep(sleep_ms * 1000);
+ }
+
+ err = status & IDXD_CMDSTS_ERR_MASK;
+ VFIO_ASSERT_EQ(err, 0, "Error issuing command 0x%x: 0x%x\n", cmd, err);
+}
+
+static void dsa_wq_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ union wq_cap_reg wq_cap = dsa->wq_cap;
+ union wqcfg wqcfg;
+ u64 wqcfg_size;
+ int i;
+
+ VFIO_ASSERT_GT((u32)wq_cap.num_wqs, 0);
+
+ wqcfg = (union wqcfg) {
+ .wq_size = wq_cap.total_wq_size,
+ .mode = 1,
+ .priority = 1,
+ /*
+ * Disable Address Translation Service (if enabled) so that VFIO
+ * selftests using this driver can generate I/O page faults.
+ */
+ .wq_ats_disable = wq_cap.wq_ats_support,
+ .max_xfer_shift = dsa->gen_cap.max_xfer_shift,
+ .max_batch_shift = dsa->gen_cap.max_batch_shift,
+ .op_config[0] = BIT(DSA_OPCODE_MEMMOVE) | BIT(DSA_OPCODE_BATCH),
+ };
+
+ wqcfg_size = 1UL << (wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
+
+ for (i = 0; i < wqcfg_size / sizeof(wqcfg.bits[0]); i++)
+ writel(wqcfg.bits[i], dsa->wqcfg_table + offsetof(union wqcfg, bits[i]));
+}
+
+static void dsa_group_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ union group_cap_reg group_cap = dsa->group_cap;
+ union engine_cap_reg engine_cap = dsa->engine_cap;
+
+ VFIO_ASSERT_GT((u32)group_cap.num_groups, 0);
+ VFIO_ASSERT_GT((u32)engine_cap.num_engines, 0);
+
+ /* Assign work queue 0 and engine 0 to group 0 */
+ writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, wqs[0]));
+ writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, engines));
+}
+
+static void dsa_register_cache_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ void *bar0 = device->bars[0].vaddr;
+
+ dsa->gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET);
+ dsa->wq_cap.bits = readq(bar0 + IDXD_WQCAP_OFFSET);
+ dsa->group_cap.bits = readq(bar0 + IDXD_GRPCAP_OFFSET);
+ dsa->engine_cap.bits = readq(bar0 + IDXD_ENGCAP_OFFSET);
+
+ dsa->table_offsets.bits[0] = readq(bar0 + IDXD_TABLE_OFFSET);
+ dsa->table_offsets.bits[1] = readq(bar0 + IDXD_TABLE_OFFSET + 8);
+
+ dsa->wqcfg_table = bar0 + dsa->table_offsets.wqcfg * IDXD_TABLE_MULT;
+ dsa->grpcfg_table = bar0 + dsa->table_offsets.grpcfg * IDXD_TABLE_MULT;
+
+ dsa->max_batches = 1U << (dsa->wq_cap.total_wq_size + IDXD_WQCFG_MIN);
+ dsa->max_batches = min(dsa->max_batches, ARRAY_SIZE(dsa->batch));
+
+ dsa->max_copies_per_batch = 1UL << dsa->gen_cap.max_batch_shift;
+ dsa->max_copies_per_batch = min(dsa->max_copies_per_batch, ARRAY_SIZE(dsa->copy));
+}
+
+static void dsa_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ VFIO_ASSERT_GE(device->driver.region.size, sizeof(*dsa));
+
+ vfio_pci_config_writew(device, PCI_COMMAND,
+ PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+
+ dsa_command(device, IDXD_CMD_RESET_DEVICE);
+
+ dsa_register_cache_init(device);
+ dsa_wq_init(device);
+ dsa_group_init(device);
+
+ dsa_command(device, IDXD_CMD_ENABLE_DEVICE);
+ dsa_command(device, IDXD_CMD_ENABLE_WQ);
+
+ vfio_pci_msix_enable(device, MSIX_VECTOR, 1);
+
+ device->driver.max_memcpy_count =
+ dsa->max_batches * dsa->max_copies_per_batch;
+ device->driver.max_memcpy_size = 1UL << dsa->gen_cap.max_xfer_shift;
+ device->driver.msi = MSIX_VECTOR;
+}
+
+static void dsa_remove(struct vfio_pci_device *device)
+{
+ dsa_command(device, IDXD_CMD_RESET_DEVICE);
+ vfio_pci_msix_disable(device);
+}
+
+static int dsa_completion_wait(struct vfio_pci_device *device,
+ struct dsa_completion_record *completion)
+{
+ u8 status;
+
+ for (;;) {
+ dsa_check_sw_err(device);
+
+ status = READ_ONCE(completion->status);
+ if (status)
+ break;
+
+ usleep(1000);
+ }
+
+ if (status == DSA_COMP_SUCCESS)
+ return 0;
+
+ printf("Error detected during memcpy operation: 0x%x\n", status);
+ return -1;
+}
+
+static void dsa_copy_desc_init(struct vfio_pci_device *device,
+ struct dsa_hw_desc *desc,
+ iova_t src, iova_t dst, u64 size,
+ bool interrupt)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ u16 flags;
+
+ flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR;
+
+ if (interrupt)
+ flags |= IDXD_OP_FLAG_RCI;
+
+ *desc = (struct dsa_hw_desc) {
+ .opcode = DSA_OPCODE_MEMMOVE,
+ .flags = flags,
+ .priv = 1,
+ .src_addr = src,
+ .dst_addr = dst,
+ .xfer_size = size,
+ .completion_addr = to_iova(device, &dsa->copy_completion),
+ .int_handle = interrupt ? MSIX_VECTOR : 0,
+ };
+}
+
+static void dsa_batch_desc_init(struct vfio_pci_device *device,
+ struct dsa_hw_desc *desc,
+ u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ *desc = (struct dsa_hw_desc) {
+ .opcode = DSA_OPCODE_BATCH,
+ .flags = IDXD_OP_FLAG_CRAV,
+ .priv = 1,
+ .completion_addr = to_iova(device, &dsa->batch_completion),
+ .desc_list_addr = to_iova(device, &dsa->copy[0]),
+ .desc_count = count,
+ };
+}
+
+static void dsa_desc_write(struct vfio_pci_device *device, struct dsa_hw_desc *desc)
+{
+ /* Write the contents (not address) of the 64-byte descriptor to the device. */
+ iosubmit_cmds512(device->bars[2].vaddr, desc, 1);
+}
+
+static void dsa_memcpy_one(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, bool interrupt)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ memset(&dsa->copy_completion, 0, sizeof(dsa->copy_completion));
+
+ dsa_copy_desc_init(device, &dsa->copy[0], src, dst, size, interrupt);
+ dsa_desc_write(device, &dsa->copy[0]);
+}
+
+static void dsa_memcpy_batch(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ int i;
+
+ memset(&dsa->batch_completion, 0, sizeof(dsa->batch_completion));
+
+ for (i = 0; i < ARRAY_SIZE(dsa->copy); i++) {
+ struct dsa_hw_desc *copy_desc = &dsa->copy[i];
+
+ dsa_copy_desc_init(device, copy_desc, src, dst, size, false);
+
+ /* Don't request completions for individual copies. */
+ copy_desc->flags &= ~IDXD_OP_FLAG_RCR;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(dsa->batch) && count; i++) {
+ struct dsa_hw_desc *batch_desc = &dsa->batch[i];
+ int nr_copies;
+
+ nr_copies = min(count, dsa->max_copies_per_batch);
+ count -= nr_copies;
+
+ /*
+ * Batches must have at least 2 copies, so handle the case where
+ * there is exactly 1 copy left by doing one less copy in this
+ * batch and then 2 in the next.
+ */
+ if (count == 1) {
+ nr_copies--;
+ count++;
+ }
+
+ dsa_batch_desc_init(device, batch_desc, nr_copies);
+
+ /* Request a completion for the last batch. */
+ if (!count)
+ batch_desc->flags |= IDXD_OP_FLAG_RCR;
+
+ dsa_desc_write(device, batch_desc);
+ }
+
+ VFIO_ASSERT_EQ(count, 0, "Failed to start %lu copies.\n", count);
+}
+
+static void dsa_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ /* DSA devices require at least 2 copies per batch. */
+ if (count == 1)
+ dsa_memcpy_one(device, src, dst, size, false);
+ else
+ dsa_memcpy_batch(device, src, dst, size, count);
+
+ dsa->memcpy_count = count;
+}
+
+static int dsa_memcpy_wait(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ int r;
+
+ if (dsa->memcpy_count == 1)
+ r = dsa_completion_wait(device, &dsa->copy_completion);
+ else
+ r = dsa_completion_wait(device, &dsa->batch_completion);
+
+ dsa->memcpy_count = 0;
+
+ return r;
+}
+
+static void dsa_send_msi(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ dsa_memcpy_one(device,
+ to_iova(device, &dsa->send_msi_src),
+ to_iova(device, &dsa->send_msi_dst),
+ sizeof(dsa->send_msi_src), true);
+
+ VFIO_ASSERT_EQ(dsa_completion_wait(device, &dsa->copy_completion), 0);
+}
+
+const struct vfio_pci_driver_ops dsa_ops = {
+ .name = "dsa",
+ .probe = dsa_probe,
+ .init = dsa_init,
+ .remove = dsa_remove,
+ .memcpy_start = dsa_memcpy_start,
+ .memcpy_wait = dsa_memcpy_wait,
+ .send_msi = dsa_send_msi,
+};
diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h
new file mode 120000
index 000000000000..bde657c3c2af
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/idxd/registers.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h
new file mode 120000
index 000000000000..8ab52ddd4458
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/ioat/hw.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
new file mode 100644
index 000000000000..c3b91d9b1f59
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <unistd.h>
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/pci_ids.h>
+#include <linux/sizes.h>
+
+#include <vfio_util.h>
+
+#include "hw.h"
+#include "registers.h"
+
+#define IOAT_DMACOUNT_MAX UINT16_MAX
+
+struct ioat_state {
+ /* Single descriptor used to issue DMA memcpy operations */
+ struct ioat_dma_descriptor desc;
+
+ /* Copy buffers used by ioat_send_msi() to generate an interrupt. */
+ u64 send_msi_src;
+ u64 send_msi_dst;
+};
+
+static inline struct ioat_state *to_ioat_state(struct vfio_pci_device *device)
+{
+ return device->driver.region.vaddr;
+}
+
+static inline void *ioat_channel_registers(struct vfio_pci_device *device)
+{
+ return device->bars[0].vaddr + IOAT_CHANNEL_MMIO_SIZE;
+}
+
+static int ioat_probe(struct vfio_pci_device *device)
+{
+ u8 version;
+ int r;
+
+ if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_IOAT_SKX))
+ return -EINVAL;
+
+ VFIO_ASSERT_NOT_NULL(device->bars[0].vaddr);
+
+ version = readb(device->bars[0].vaddr + IOAT_VER_OFFSET);
+ switch (version) {
+ case IOAT_VER_3_2:
+ case IOAT_VER_3_3:
+ r = 0;
+ break;
+ default:
+ printf("ioat: Unsupported version: 0x%x\n", version);
+ r = -EINVAL;
+ }
+ return r;
+}
+
+static u64 ioat_channel_status(void *bar)
+{
+ return readq(bar + IOAT_CHANSTS_OFFSET) & IOAT_CHANSTS_STATUS;
+}
+
+static void ioat_clear_errors(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u32 errors;
+
+ errors = vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET);
+ vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors);
+
+ errors = vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+ vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors);
+
+ errors = readl(registers + IOAT_CHANERR_OFFSET);
+ writel(errors, registers + IOAT_CHANERR_OFFSET);
+}
+
+static void ioat_reset(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u32 sleep_ms = 1, attempts = 5000 / sleep_ms;
+ u8 chancmd;
+
+ ioat_clear_errors(device);
+
+ writeb(IOAT_CHANCMD_RESET, registers + IOAT2_CHANCMD_OFFSET);
+
+ for (;;) {
+ chancmd = readb(registers + IOAT2_CHANCMD_OFFSET);
+ if (!(chancmd & IOAT_CHANCMD_RESET))
+ break;
+
+ VFIO_ASSERT_GT(--attempts, 0);
+ usleep(sleep_ms * 1000);
+ }
+
+ VFIO_ASSERT_EQ(ioat_channel_status(registers), IOAT_CHANSTS_HALTED);
+}
+
+static void ioat_init(struct vfio_pci_device *device)
+{
+ struct ioat_state *ioat = to_ioat_state(device);
+ u8 intrctrl;
+
+ VFIO_ASSERT_GE(device->driver.region.size, sizeof(*ioat));
+
+ vfio_pci_config_writew(device, PCI_COMMAND,
+ PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+
+ ioat_reset(device);
+
+ /* Enable the use of MXI-x interrupts for channel interrupts. */
+ intrctrl = IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ writeb(intrctrl, device->bars[0].vaddr + IOAT_INTRCTRL_OFFSET);
+
+ vfio_pci_msix_enable(device, 0, device->msix_info.count);
+
+ device->driver.msi = 0;
+ device->driver.max_memcpy_size =
+ 1UL << readb(device->bars[0].vaddr + IOAT_XFERCAP_OFFSET);
+ device->driver.max_memcpy_count = IOAT_DMACOUNT_MAX;
+}
+
+static void ioat_remove(struct vfio_pci_device *device)
+{
+ ioat_reset(device);
+ vfio_pci_msix_disable(device);
+}
+
+static void ioat_handle_error(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+
+ printf("Error detected during memcpy operation!\n"
+ " CHANERR: 0x%x\n"
+ " CHANERR_INT: 0x%x\n"
+ " DMAUNCERRSTS: 0x%x\n",
+ readl(registers + IOAT_CHANERR_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET));
+
+ ioat_reset(device);
+}
+
+static int ioat_memcpy_wait(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u64 status;
+ int r = 0;
+
+ /* Wait until all operations complete. */
+ for (;;) {
+ status = ioat_channel_status(registers);
+ if (status == IOAT_CHANSTS_DONE)
+ break;
+
+ if (status == IOAT_CHANSTS_HALTED) {
+ ioat_handle_error(device);
+ return -1;
+ }
+ }
+
+ /* Put the channel into the SUSPENDED state. */
+ writeb(IOAT_CHANCMD_SUSPEND, registers + IOAT2_CHANCMD_OFFSET);
+ for (;;) {
+ status = ioat_channel_status(registers);
+ if (status == IOAT_CHANSTS_SUSPENDED)
+ break;
+ }
+
+ return r;
+}
+
+static void __ioat_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u16 count, bool interrupt)
+{
+ void *registers = ioat_channel_registers(device);
+ struct ioat_state *ioat = to_ioat_state(device);
+ u64 desc_iova;
+ u16 chanctrl;
+
+ desc_iova = to_iova(device, &ioat->desc);
+ ioat->desc = (struct ioat_dma_descriptor) {
+ .ctl_f.op = IOAT_OP_COPY,
+ .ctl_f.int_en = interrupt,
+ .src_addr = src,
+ .dst_addr = dst,
+ .size = size,
+ .next = desc_iova,
+ };
+
+ /* Tell the device the address of the descriptor. */
+ writeq(desc_iova, registers + IOAT2_CHAINADDR_OFFSET);
+
+ /* (Re)Enable the channel interrupt and abort on any errors */
+ chanctrl = IOAT_CHANCTRL_INT_REARM | IOAT_CHANCTRL_ANY_ERR_ABORT_EN;
+ writew(chanctrl, registers + IOAT_CHANCTRL_OFFSET);
+
+ /* Kick off @count DMA copy operation(s). */
+ writew(count, registers + IOAT_CHAN_DMACOUNT_OFFSET);
+}
+
+static void ioat_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count)
+{
+ __ioat_memcpy_start(device, src, dst, size, count, false);
+}
+
+static void ioat_send_msi(struct vfio_pci_device *device)
+{
+ struct ioat_state *ioat = to_ioat_state(device);
+
+ __ioat_memcpy_start(device,
+ to_iova(device, &ioat->send_msi_src),
+ to_iova(device, &ioat->send_msi_dst),
+ sizeof(ioat->send_msi_src), 1, true);
+
+ VFIO_ASSERT_EQ(ioat_memcpy_wait(device), 0);
+}
+
+const struct vfio_pci_driver_ops ioat_ops = {
+ .name = "ioat",
+ .probe = ioat_probe,
+ .init = ioat_init,
+ .remove = ioat_remove,
+ .memcpy_start = ioat_memcpy_start,
+ .memcpy_wait = ioat_memcpy_wait,
+ .send_msi = ioat_send_msi,
+};
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h
new file mode 120000
index 000000000000..0b809cfd8fe6
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/ioat/registers.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
new file mode 100644
index 000000000000..ed31606e01b7
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H
+
+#include <fcntl.h>
+#include <string.h>
+#include <linux/vfio.h>
+#include <linux/list.h>
+#include <linux/pci_regs.h>
+
+#include "../../../kselftest.h"
+
+#define VFIO_LOG_AND_EXIT(...) do { \
+ fprintf(stderr, " " __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ exit(KSFT_FAIL); \
+} while (0)
+
+#define VFIO_ASSERT_OP(_lhs, _rhs, _op, ...) do { \
+ typeof(_lhs) __lhs = (_lhs); \
+ typeof(_rhs) __rhs = (_rhs); \
+ \
+ if (__lhs _op __rhs) \
+ break; \
+ \
+ fprintf(stderr, "%s:%u: Assertion Failure\n\n", __FILE__, __LINE__); \
+ fprintf(stderr, " Expression: " #_lhs " " #_op " " #_rhs "\n"); \
+ fprintf(stderr, " Observed: %#lx %s %#lx\n", \
+ (u64)__lhs, #_op, (u64)__rhs); \
+ fprintf(stderr, " [errno: %d - %s]\n", errno, strerror(errno)); \
+ VFIO_LOG_AND_EXIT(__VA_ARGS__); \
+} while (0)
+
+#define VFIO_ASSERT_EQ(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, ==, ##__VA_ARGS__)
+#define VFIO_ASSERT_NE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, !=, ##__VA_ARGS__)
+#define VFIO_ASSERT_LT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <, ##__VA_ARGS__)
+#define VFIO_ASSERT_LE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <=, ##__VA_ARGS__)
+#define VFIO_ASSERT_GT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >, ##__VA_ARGS__)
+#define VFIO_ASSERT_GE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >=, ##__VA_ARGS__)
+#define VFIO_ASSERT_TRUE(_a, ...) VFIO_ASSERT_NE(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_FALSE(_a, ...) VFIO_ASSERT_EQ(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_NULL(_a, ...) VFIO_ASSERT_EQ(NULL, _a, ##__VA_ARGS__)
+#define VFIO_ASSERT_NOT_NULL(_a, ...) VFIO_ASSERT_NE(NULL, _a, ##__VA_ARGS__)
+
+#define VFIO_FAIL(_fmt, ...) do { \
+ fprintf(stderr, "%s:%u: FAIL\n\n", __FILE__, __LINE__); \
+ VFIO_LOG_AND_EXIT(_fmt, ##__VA_ARGS__); \
+} while (0)
+
+struct vfio_iommu_mode {
+ const char *name;
+ const char *container_path;
+ unsigned long iommu_type;
+};
+
+/*
+ * Generator for VFIO selftests fixture variants that replicate across all
+ * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()
+ * which should then use FIXTURE_VARIANT_ADD() to create the variant.
+ */
+#define FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(...) \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1v2_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd, ##__VA_ARGS__)
+
+struct vfio_pci_bar {
+ struct vfio_region_info info;
+ void *vaddr;
+};
+
+typedef u64 iova_t;
+
+#define INVALID_IOVA UINT64_MAX
+
+struct vfio_dma_region {
+ struct list_head link;
+ void *vaddr;
+ iova_t iova;
+ u64 size;
+};
+
+struct vfio_pci_device;
+
+struct vfio_pci_driver_ops {
+ const char *name;
+
+ /**
+ * @probe() - Check if the driver supports the given device.
+ *
+ * Return: 0 on success, non-0 on failure.
+ */
+ int (*probe)(struct vfio_pci_device *device);
+
+ /**
+ * @init() - Initialize the driver for @device.
+ *
+ * Must be called after device->driver.region has been initialized.
+ */
+ void (*init)(struct vfio_pci_device *device);
+
+ /**
+ * remove() - Deinitialize the driver for @device.
+ */
+ void (*remove)(struct vfio_pci_device *device);
+
+ /**
+ * memcpy_start() - Kick off @count repeated memcpy operations from
+ * [@src, @src + @size) to [@dst, @dst + @size).
+ *
+ * Guarantees:
+ * - The device will attempt DMA reads on [src, src + size).
+ * - The device will attempt DMA writes on [dst, dst + size).
+ * - The device will not generate any interrupts.
+ *
+ * memcpy_start() returns immediately, it does not wait for the
+ * copies to complete.
+ */
+ void (*memcpy_start)(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count);
+
+ /**
+ * memcpy_wait() - Wait until the memcpy operations started by
+ * memcpy_start() have finished.
+ *
+ * Guarantees:
+ * - All in-flight DMAs initiated by memcpy_start() are fully complete
+ * before memcpy_wait() returns.
+ *
+ * Returns non-0 if the driver detects that an error occurred during the
+ * memcpy, 0 otherwise.
+ */
+ int (*memcpy_wait)(struct vfio_pci_device *device);
+
+ /**
+ * send_msi() - Make the device send the MSI device->driver.msi.
+ *
+ * Guarantees:
+ * - The device will send the MSI once.
+ */
+ void (*send_msi)(struct vfio_pci_device *device);
+};
+
+struct vfio_pci_driver {
+ const struct vfio_pci_driver_ops *ops;
+ bool initialized;
+ bool memcpy_in_progress;
+
+ /* Region to be used by the driver (e.g. for in-memory descriptors) */
+ struct vfio_dma_region region;
+
+ /* The maximum size that can be passed to memcpy_start(). */
+ u64 max_memcpy_size;
+
+ /* The maximum count that can be passed to memcpy_start(). */
+ u64 max_memcpy_count;
+
+ /* The MSI vector the device will signal in ops->send_msi(). */
+ int msi;
+};
+
+struct vfio_pci_device {
+ int fd;
+
+ const struct vfio_iommu_mode *iommu_mode;
+ int group_fd;
+ int container_fd;
+
+ int iommufd;
+ u32 ioas_id;
+
+ struct vfio_device_info info;
+ struct vfio_region_info config_space;
+ struct vfio_pci_bar bars[PCI_STD_NUM_BARS];
+
+ struct vfio_irq_info msi_info;
+ struct vfio_irq_info msix_info;
+
+ struct list_head dma_regions;
+
+ /* eventfds for MSI and MSI-x interrupts */
+ int msi_eventfds[PCI_MSIX_FLAGS_QSIZE + 1];
+
+ struct vfio_pci_driver driver;
+};
+
+/*
+ * Return the BDF string of the device that the test should use.
+ *
+ * If a BDF string is provided by the user on the command line (as the last
+ * element of argv[]), then this function will return that and decrement argc
+ * by 1.
+ *
+ * Otherwise this function will attempt to use the environment variable
+ * $VFIO_SELFTESTS_BDF.
+ *
+ * If BDF cannot be determined then the test will exit with KSFT_SKIP.
+ */
+const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
+const char *vfio_pci_get_cdev_path(const char *bdf);
+
+extern const char *default_iommu_mode;
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode);
+void vfio_pci_device_cleanup(struct vfio_pci_device *device);
+void vfio_pci_device_reset(struct vfio_pci_device *device);
+
+void vfio_pci_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region);
+void vfio_pci_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region);
+
+void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
+ size_t config, size_t size, void *data);
+
+#define vfio_pci_config_read(_device, _offset, _type) ({ \
+ _type __data; \
+ vfio_pci_config_access((_device), false, _offset, sizeof(__data), &__data); \
+ __data; \
+})
+
+#define vfio_pci_config_readb(_d, _o) vfio_pci_config_read(_d, _o, u8)
+#define vfio_pci_config_readw(_d, _o) vfio_pci_config_read(_d, _o, u16)
+#define vfio_pci_config_readl(_d, _o) vfio_pci_config_read(_d, _o, u32)
+
+#define vfio_pci_config_write(_device, _offset, _value, _type) do { \
+ _type __data = (_value); \
+ vfio_pci_config_access((_device), true, _offset, sizeof(_type), &__data); \
+} while (0)
+
+#define vfio_pci_config_writeb(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u8)
+#define vfio_pci_config_writew(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u16)
+#define vfio_pci_config_writel(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u32)
+
+void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index,
+ u32 vector, int count);
+void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index);
+void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector);
+
+static inline void fcntl_set_nonblock(int fd)
+{
+ int r;
+
+ r = fcntl(fd, F_GETFL, 0);
+ VFIO_ASSERT_NE(r, -1, "F_GETFL failed for fd %d\n", fd);
+
+ r = fcntl(fd, F_SETFL, r | O_NONBLOCK);
+ VFIO_ASSERT_NE(r, -1, "F_SETFL O_NONBLOCK failed for fd %d\n", fd);
+}
+
+static inline void vfio_pci_msi_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSI_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msi_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSI_IRQ_INDEX);
+}
+
+static inline void vfio_pci_msix_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSIX_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msix_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSIX_IRQ_INDEX);
+}
+
+iova_t __to_iova(struct vfio_pci_device *device, void *vaddr);
+iova_t to_iova(struct vfio_pci_device *device, void *vaddr);
+
+static inline bool vfio_pci_device_match(struct vfio_pci_device *device,
+ u16 vendor_id, u16 device_id)
+{
+ return (vendor_id == vfio_pci_config_readw(device, PCI_VENDOR_ID)) &&
+ (device_id == vfio_pci_config_readw(device, PCI_DEVICE_ID));
+}
+
+void vfio_pci_driver_probe(struct vfio_pci_device *device);
+void vfio_pci_driver_init(struct vfio_pci_device *device);
+void vfio_pci_driver_remove(struct vfio_pci_device *device);
+int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size);
+void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count);
+int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device);
+void vfio_pci_driver_send_msi(struct vfio_pci_device *device);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H */
diff --git a/tools/testing/selftests/vfio/lib/libvfio.mk b/tools/testing/selftests/vfio/lib/libvfio.mk
new file mode 100644
index 000000000000..5d11c3a89a28
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/libvfio.mk
@@ -0,0 +1,24 @@
+include $(top_srcdir)/scripts/subarch.include
+ARCH ?= $(SUBARCH)
+
+VFIO_DIR := $(selfdir)/vfio
+
+LIBVFIO_C := lib/vfio_pci_device.c
+LIBVFIO_C += lib/vfio_pci_driver.c
+
+ifeq ($(ARCH:x86_64=x86),x86)
+LIBVFIO_C += lib/drivers/ioat/ioat.c
+LIBVFIO_C += lib/drivers/dsa/dsa.c
+endif
+
+LIBVFIO_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBVFIO_C))
+
+LIBVFIO_O_DIRS := $(shell dirname $(LIBVFIO_O) | uniq)
+$(shell mkdir -p $(LIBVFIO_O_DIRS))
+
+CFLAGS += -I$(VFIO_DIR)/lib/include
+
+$(LIBVFIO_O): $(OUTPUT)/%.o : $(VFIO_DIR)/%.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBVFIO_O)
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
new file mode 100644
index 000000000000..0921b2451ba5
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <uapi/linux/types.h>
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "../../../kselftest.h"
+#include <vfio_util.h>
+
+#define PCI_SYSFS_PATH "/sys/bus/pci/devices"
+
+#define ioctl_assert(_fd, _op, _arg) do { \
+ void *__arg = (_arg); \
+ int __ret = ioctl((_fd), (_op), (__arg)); \
+ VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
+} while (0)
+
+iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
+{
+ struct vfio_dma_region *region;
+
+ list_for_each_entry(region, &device->dma_regions, link) {
+ if (vaddr < region->vaddr)
+ continue;
+
+ if (vaddr >= region->vaddr + region->size)
+ continue;
+
+ return region->iova + (vaddr - region->vaddr);
+ }
+
+ return INVALID_IOVA;
+}
+
+iova_t to_iova(struct vfio_pci_device *device, void *vaddr)
+{
+ iova_t iova;
+
+ iova = __to_iova(device, vaddr);
+ VFIO_ASSERT_NE(iova, INVALID_IOVA, "%p is not mapped into device.\n", vaddr);
+
+ return iova;
+}
+
+static void vfio_pci_irq_set(struct vfio_pci_device *device,
+ u32 index, u32 vector, u32 count, int *fds)
+{
+ u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {};
+ struct vfio_irq_set *irq = (void *)&buf;
+ int *irq_fds = (void *)&irq->data;
+
+ irq->argsz = sizeof(buf);
+ irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq->index = index;
+ irq->start = vector;
+ irq->count = count;
+
+ if (count) {
+ irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD;
+ memcpy(irq_fds, fds, sizeof(int) * count);
+ } else {
+ irq->flags |= VFIO_IRQ_SET_DATA_NONE;
+ }
+
+ ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq);
+}
+
+void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector)
+{
+ struct vfio_irq_set irq = {
+ .argsz = sizeof(irq),
+ .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE,
+ .index = index,
+ .start = vector,
+ .count = 1,
+ };
+
+ ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq);
+}
+
+static void check_supported_irq_index(u32 index)
+{
+ /* VFIO selftests only supports MSI and MSI-x for now. */
+ VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX ||
+ index == VFIO_PCI_MSIX_IRQ_INDEX,
+ "Unsupported IRQ index: %u\n", index);
+}
+
+void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector,
+ int count)
+{
+ int i;
+
+ check_supported_irq_index(index);
+
+ for (i = vector; i < vector + count; i++) {
+ VFIO_ASSERT_LT(device->msi_eventfds[i], 0);
+ device->msi_eventfds[i] = eventfd(0, 0);
+ VFIO_ASSERT_GE(device->msi_eventfds[i], 0);
+ }
+
+ vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector);
+}
+
+void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index)
+{
+ int i;
+
+ check_supported_irq_index(index);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
+ if (device->msi_eventfds[i] < 0)
+ continue;
+
+ VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
+ device->msi_eventfds[i] = -1;
+ }
+
+ vfio_pci_irq_set(device, index, 0, 0, NULL);
+}
+
+static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
+ struct vfio_irq_info *irq_info)
+{
+ irq_info->argsz = sizeof(*irq_info);
+ irq_info->index = index;
+
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
+}
+
+static void vfio_iommu_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ struct vfio_iommu_type1_dma_map args = {
+ .argsz = sizeof(args),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .vaddr = (u64)region->vaddr,
+ .iova = region->iova,
+ .size = region->size,
+ };
+
+ ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &args);
+}
+
+static void iommufd_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ struct iommu_ioas_map args = {
+ .size = sizeof(args),
+ .flags = IOMMU_IOAS_MAP_READABLE |
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_FIXED_IOVA,
+ .user_va = (u64)region->vaddr,
+ .iova = region->iova,
+ .length = region->size,
+ .ioas_id = device->ioas_id,
+ };
+
+ ioctl_assert(device->iommufd, IOMMU_IOAS_MAP, &args);
+}
+
+void vfio_pci_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ if (device->iommufd)
+ iommufd_dma_map(device, region);
+ else
+ vfio_iommu_dma_map(device, region);
+
+ list_add(&region->link, &device->dma_regions);
+}
+
+static void vfio_iommu_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ struct vfio_iommu_type1_dma_unmap args = {
+ .argsz = sizeof(args),
+ .iova = region->iova,
+ .size = region->size,
+ };
+
+ ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &args);
+}
+
+static void iommufd_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ struct iommu_ioas_unmap args = {
+ .size = sizeof(args),
+ .iova = region->iova,
+ .length = region->size,
+ .ioas_id = device->ioas_id,
+ };
+
+ ioctl_assert(device->iommufd, IOMMU_IOAS_UNMAP, &args);
+}
+
+void vfio_pci_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ if (device->iommufd)
+ iommufd_dma_unmap(device, region);
+ else
+ vfio_iommu_dma_unmap(device, region);
+
+ list_del(&region->link);
+}
+
+static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
+ struct vfio_region_info *info)
+{
+ memset(info, 0, sizeof(*info));
+
+ info->argsz = sizeof(*info);
+ info->index = index;
+
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info);
+}
+
+static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
+{
+ struct vfio_pci_bar *bar = &device->bars[index];
+ int prot = 0;
+
+ VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
+ VFIO_ASSERT_NULL(bar->vaddr);
+ VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
+
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
+ prot |= PROT_READ;
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
+ prot |= PROT_WRITE;
+
+ bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED,
+ device->fd, bar->info.offset);
+ VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
+}
+
+static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
+{
+ struct vfio_pci_bar *bar = &device->bars[index];
+
+ VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
+ VFIO_ASSERT_NOT_NULL(bar->vaddr);
+
+ VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0);
+ bar->vaddr = NULL;
+}
+
+static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device)
+{
+ int i;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (device->bars[i].vaddr)
+ vfio_pci_bar_unmap(device, i);
+ }
+}
+
+void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
+ size_t config, size_t size, void *data)
+{
+ struct vfio_region_info *config_space = &device->config_space;
+ int ret;
+
+ if (write)
+ ret = pwrite(device->fd, data, size, config_space->offset + config);
+ else
+ ret = pread(device->fd, data, size, config_space->offset + config);
+
+ VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n",
+ write ? "write to" : "read from", config);
+}
+
+void vfio_pci_device_reset(struct vfio_pci_device *device)
+{
+ ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL);
+}
+
+static unsigned int vfio_pci_get_group_from_dev(const char *bdf)
+{
+ char dev_iommu_group_path[PATH_MAX] = {0};
+ char sysfs_path[PATH_MAX] = {0};
+ unsigned int group;
+ int ret;
+
+ snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf);
+
+ ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path));
+ VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf);
+
+ ret = sscanf(basename(dev_iommu_group_path), "%u", &group);
+ VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf);
+
+ return group;
+}
+
+static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status),
+ };
+ char group_path[32];
+ int group;
+
+ group = vfio_pci_get_group_from_dev(bdf);
+ snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group);
+
+ device->group_fd = open(group_path, O_RDWR);
+ VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path);
+
+ ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+ VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE);
+
+ ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->container_fd);
+}
+
+static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ unsigned long iommu_type = device->iommu_mode->iommu_type;
+ const char *path = device->iommu_mode->container_path;
+ int version;
+ int ret;
+
+ device->container_fd = open(path, O_RDWR);
+ VFIO_ASSERT_GE(device->container_fd, 0, "open(%s) failed\n", path);
+
+ version = ioctl(device->container_fd, VFIO_GET_API_VERSION);
+ VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version);
+
+ vfio_pci_group_setup(device, bdf);
+
+ ret = ioctl(device->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
+ VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type);
+
+ ioctl_assert(device->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
+
+ device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf);
+ VFIO_ASSERT_GE(device->fd, 0);
+}
+
+static void vfio_pci_device_setup(struct vfio_pci_device *device)
+{
+ int i;
+
+ device->info.argsz = sizeof(device->info);
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info);
+
+ vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space);
+
+ /* Sanity check VFIO does not advertise mmap for config space */
+ VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP),
+ "PCI config space should not support mmap()\n");
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ struct vfio_pci_bar *bar = device->bars + i;
+
+ vfio_pci_region_get(device, i, &bar->info);
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)
+ vfio_pci_bar_map(device, i);
+ }
+
+ vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info);
+ vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++)
+ device->msi_eventfds[i] = -1;
+}
+
+const char *vfio_pci_get_cdev_path(const char *bdf)
+{
+ char dir_path[PATH_MAX];
+ struct dirent *entry;
+ char *cdev_path;
+ DIR *dir;
+
+ cdev_path = calloc(PATH_MAX, 1);
+ VFIO_ASSERT_NOT_NULL(cdev_path);
+
+ snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf);
+
+ dir = opendir(dir_path);
+ VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path);
+
+ while ((entry = readdir(dir)) != NULL) {
+ /* Find the file that starts with "vfio" */
+ if (strncmp("vfio", entry->d_name, 4))
+ continue;
+
+ snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name);
+ break;
+ }
+
+ VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n");
+ VFIO_ASSERT_EQ(closedir(dir), 0);
+
+ return cdev_path;
+}
+
+/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
+static const struct vfio_iommu_mode iommu_modes[] = {
+ {
+ .name = "vfio_type1_iommu",
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = "vfio_type1v2_iommu",
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = "iommufd_compat_type1",
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = "iommufd_compat_type1v2",
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = "iommufd",
+ },
+};
+
+const char *default_iommu_mode = "iommufd";
+
+static const struct vfio_iommu_mode *lookup_iommu_mode(const char *iommu_mode)
+{
+ int i;
+
+ if (!iommu_mode)
+ iommu_mode = default_iommu_mode;
+
+ for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) {
+ if (strcmp(iommu_mode, iommu_modes[i].name))
+ continue;
+
+ return &iommu_modes[i];
+ }
+
+ VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode);
+}
+
+static void vfio_device_bind_iommufd(int device_fd, int iommufd)
+{
+ struct vfio_device_bind_iommufd args = {
+ .argsz = sizeof(args),
+ .iommufd = iommufd,
+ };
+
+ ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args);
+}
+
+static u32 iommufd_ioas_alloc(int iommufd)
+{
+ struct iommu_ioas_alloc args = {
+ .size = sizeof(args),
+ };
+
+ ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args);
+ return args.out_ioas_id;
+}
+
+static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
+{
+ struct vfio_device_attach_iommufd_pt args = {
+ .argsz = sizeof(args),
+ .pt_id = pt_id,
+ };
+
+ ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args);
+}
+
+static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ const char *cdev_path = vfio_pci_get_cdev_path(bdf);
+
+ device->fd = open(cdev_path, O_RDWR);
+ VFIO_ASSERT_GE(device->fd, 0);
+ free((void *)cdev_path);
+
+ /*
+ * Require device->iommufd to be >0 so that a simple non-0 check can be
+ * used to check if iommufd is enabled. In practice open() will never
+ * return 0 unless stdin is closed.
+ */
+ device->iommufd = open("/dev/iommu", O_RDWR);
+ VFIO_ASSERT_GT(device->iommufd, 0);
+
+ vfio_device_bind_iommufd(device->fd, device->iommufd);
+ device->ioas_id = iommufd_ioas_alloc(device->iommufd);
+ vfio_device_attach_iommufd_pt(device->fd, device->ioas_id);
+}
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode)
+{
+ struct vfio_pci_device *device;
+
+ device = calloc(1, sizeof(*device));
+ VFIO_ASSERT_NOT_NULL(device);
+
+ INIT_LIST_HEAD(&device->dma_regions);
+
+ device->iommu_mode = lookup_iommu_mode(iommu_mode);
+
+ if (device->iommu_mode->container_path)
+ vfio_pci_container_setup(device, bdf);
+ else
+ vfio_pci_iommufd_setup(device, bdf);
+
+ vfio_pci_device_setup(device);
+ vfio_pci_driver_probe(device);
+
+ return device;
+}
+
+void vfio_pci_device_cleanup(struct vfio_pci_device *device)
+{
+ int i;
+
+ if (device->driver.initialized)
+ vfio_pci_driver_remove(device);
+
+ vfio_pci_bar_unmap_all(device);
+
+ VFIO_ASSERT_EQ(close(device->fd), 0);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
+ if (device->msi_eventfds[i] < 0)
+ continue;
+
+ VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
+ }
+
+ if (device->iommufd) {
+ VFIO_ASSERT_EQ(close(device->iommufd), 0);
+ } else {
+ VFIO_ASSERT_EQ(close(device->group_fd), 0);
+ VFIO_ASSERT_EQ(close(device->container_fd), 0);
+ }
+
+ free(device);
+}
+
+static bool is_bdf(const char *str)
+{
+ unsigned int s, b, d, f;
+ int length, count;
+
+ count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length);
+ return count == 4 && length == strlen(str);
+}
+
+const char *vfio_selftests_get_bdf(int *argc, char *argv[])
+{
+ char *bdf;
+
+ if (*argc > 1 && is_bdf(argv[*argc - 1]))
+ return argv[--(*argc)];
+
+ bdf = getenv("VFIO_SELFTESTS_BDF");
+ if (bdf) {
+ VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf);
+ return bdf;
+ }
+
+ fprintf(stderr, "Unable to determine which device to use, skipping test.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address via environment variable:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " export VFIO_SELFTESTS_BDF=segment:bus:device.function\n");
+ fprintf(stderr, " %s [options]\n", argv[0]);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address via argv:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " %s [options] segment:bus:device.function\n", argv[0]);
+ fprintf(stderr, "\n");
+ exit(KSFT_SKIP);
+}
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
new file mode 100644
index 000000000000..e5e8723ecb41
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdio.h>
+
+#include "../../../kselftest.h"
+#include <vfio_util.h>
+
+#ifdef __x86_64__
+extern struct vfio_pci_driver_ops dsa_ops;
+extern struct vfio_pci_driver_ops ioat_ops;
+#endif
+
+static struct vfio_pci_driver_ops *driver_ops[] = {
+#ifdef __x86_64__
+ &dsa_ops,
+ &ioat_ops,
+#endif
+};
+
+void vfio_pci_driver_probe(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver_ops *ops;
+ int i;
+
+ VFIO_ASSERT_NULL(device->driver.ops);
+
+ for (i = 0; i < ARRAY_SIZE(driver_ops); i++) {
+ ops = driver_ops[i];
+
+ if (ops->probe(device))
+ continue;
+
+ printf("Driver found: %s\n", ops->name);
+ device->driver.ops = ops;
+ }
+}
+
+static void vfio_check_driver_op(struct vfio_pci_driver *driver, void *op,
+ const char *op_name)
+{
+ VFIO_ASSERT_NOT_NULL(driver->ops);
+ VFIO_ASSERT_NOT_NULL(op, "Driver has no %s()\n", op_name);
+ VFIO_ASSERT_EQ(driver->initialized, op != driver->ops->init);
+ VFIO_ASSERT_EQ(driver->memcpy_in_progress, op == driver->ops->memcpy_wait);
+}
+
+#define VFIO_CHECK_DRIVER_OP(_driver, _op) do { \
+ struct vfio_pci_driver *__driver = (_driver); \
+ vfio_check_driver_op(__driver, __driver->ops->_op, #_op); \
+} while (0)
+
+void vfio_pci_driver_init(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_ASSERT_NOT_NULL(driver->region.vaddr);
+ VFIO_CHECK_DRIVER_OP(driver, init);
+
+ driver->ops->init(device);
+
+ driver->initialized = true;
+
+ printf("%s: region: vaddr %p, iova 0x%lx, size 0x%lx\n",
+ driver->ops->name,
+ driver->region.vaddr,
+ driver->region.iova,
+ driver->region.size);
+
+ printf("%s: max_memcpy_size 0x%lx, max_memcpy_count 0x%lx\n",
+ driver->ops->name,
+ driver->max_memcpy_size,
+ driver->max_memcpy_count);
+}
+
+void vfio_pci_driver_remove(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_CHECK_DRIVER_OP(driver, remove);
+
+ driver->ops->remove(device);
+ driver->initialized = false;
+}
+
+void vfio_pci_driver_send_msi(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_CHECK_DRIVER_OP(driver, send_msi);
+
+ driver->ops->send_msi(device);
+}
+
+void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_ASSERT_LE(size, driver->max_memcpy_size);
+ VFIO_ASSERT_LE(count, driver->max_memcpy_count);
+ VFIO_CHECK_DRIVER_OP(driver, memcpy_start);
+
+ driver->ops->memcpy_start(device, src, dst, size, count);
+ driver->memcpy_in_progress = true;
+}
+
+int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+ int r;
+
+ VFIO_CHECK_DRIVER_OP(driver, memcpy_wait);
+
+ r = driver->ops->memcpy_wait(device);
+ driver->memcpy_in_progress = false;
+
+ return r;
+}
+
+int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size)
+{
+ vfio_pci_driver_memcpy_start(device, src, dst, size, 1);
+
+ return vfio_pci_driver_memcpy_wait(device);
+}