summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5/mlx5_ib.h
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2022-09-01 11:20:56 -0300
committerJason Gunthorpe <jgg@nvidia.com>2022-09-27 10:15:24 -0300
commit72b2f7608a59727e7c2e5b11cff2749c2c080fac (patch)
treecdd1d28a40e6fa5611a92645c865d78c45435011 /drivers/infiniband/hw/mlx5/mlx5_ib.h
parent9af859c58d0f169ead0ed95204cdd891b0ee623a (diff)
RDMA/mlx5: Enable ATS support for MRs and umems
For mlx5 if ATS is enabled in the PCI config then the device will use ATS requests for only certain DMA operations. This has to be opted in by the SW side based on the mkey or umem settings. ATS slows down the PCI performance, so it should only be set in cases when it is needed. All of these cases revolve around optimizing PCI P2P transfers and avoiding bad cases where the bus just doesn't work. Link: https://lore.kernel.org/r/4-v1-bd147097458e+ede-umem_dmabuf_jgg@nvidia.com Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx5/mlx5_ib.h')
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h36
1 files changed, 36 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2e2ad3918385..7e2c4a378220 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1550,4 +1550,40 @@ static inline bool rt_supported(int ts_cap)
return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
}
+
+/*
+ * PCI Peer to Peer is a trainwreck. If no switch is present then things
+ * sometimes work, depending on the pci_distance_p2p logic for excluding broken
+ * root complexes. However if a switch is present in the path, then things get
+ * really ugly depending on how the switch is setup. This table assumes that the
+ * root complex is strict and is validating that all req/reps are matches
+ * perfectly - so any scenario where it sees only half the transaction is a
+ * failure.
+ *
+ * CR/RR/DT ATS RO P2P
+ * 00X X X OK
+ * 010 X X fails (request is routed to root but root never sees comp)
+ * 011 0 X fails (request is routed to root but root never sees comp)
+ * 011 1 X OK
+ * 10X X 1 OK
+ * 101 X 0 fails (completion is routed to root but root didn't see req)
+ * 110 X 0 SLOW
+ * 111 0 0 SLOW
+ * 111 1 0 fails (completion is routed to root but root didn't see req)
+ * 111 1 1 OK
+ *
+ * Unfortunately we cannot reliably know if a switch is present or what the
+ * CR/RR/DT ACS settings are, as in a VM that is all hidden. Assume that
+ * CR/RR/DT is 111 if the ATS cap is enabled and follow the last three rows.
+ *
+ * For now assume if the umem is a dma_buf then it is P2P.
+ */
+static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev,
+ struct ib_umem *umem, int access_flags)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, ats) || !umem->is_dmabuf)
+ return false;
+ return access_flags & IB_ACCESS_RELAXED_ORDERING;
+}
+
#endif /* MLX5_IB_H */