summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c156
1 files changed, 143 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 8e81a83d37d8..e22cfa7c6d32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -36,12 +36,36 @@
#include "amdgpu_gem.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
#include <drm/amdgpu_drm.h>
#include <drm/ttm/ttm_tt.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
#include <linux/pci-p2pdma.h>
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops;
+
+/**
+ * dma_buf_attach_adev - Helper to get adev of an attachment
+ *
+ * @attach: attachment
+ *
+ * Returns:
+ * A struct amdgpu_device * if the attaching device is an amdgpu device or
+ * partition, NULL otherwise.
+ */
+static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach)
+{
+ if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) {
+ struct drm_gem_object *obj = attach->importer_priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ return amdgpu_ttm_adev(bo->tbo.bdev);
+ }
+
+ return NULL;
+}
+
/**
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
*
@@ -53,13 +77,48 @@
static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct dma_buf_attachment *attach)
{
+ struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach);
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ int r;
+
+ /*
+ * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
+ * Such buffers cannot be safely accessed over P2P due to device-local
+ * compression metadata. Fallback to system-memory path instead.
+ * Device supports GFX12 (GC 12.x or newer)
+ * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
+ *
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
+ bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+ attach->peer2peer = false;
- if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
+ /*
+ * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
+ * Such buffers cannot be safely accessed over P2P due to device-local
+ * compression metadata. Fallback to system-memory path instead.
+ * Device supports GFX12 (GC 12.x or newer)
+ * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
+ *
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
+ bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
attach->peer2peer = false;
+ if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) &&
+ pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
+ attach->peer2peer = false;
+
+ r = dma_resv_lock(bo->tbo.base.resv, NULL);
+ if (r)
+ return r;
+
+ amdgpu_vm_bo_update_shared(bo);
+
+ dma_resv_unlock(bo->tbo.base.resv);
+
return 0;
}
@@ -72,11 +131,35 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
*/
static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = attach->dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct dma_buf *dmabuf = attach->dmabuf;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv);
+ u32 domains = bo->allowed_domains;
+
+ dma_resv_assert_held(dmabuf->resv);
+
+ /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP
+ * support if all attachments can do P2P. If any attachment can't do
+ * P2P just pin into GTT instead.
+ *
+ * To avoid with conflicting pinnings between GPUs and RDMA when move
+ * notifiers are disabled, only allow pinning in VRAM when move
+ * notiers are enabled.
+ */
+ if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ } else {
+ list_for_each_entry(attach, &dmabuf->attachments, node)
+ if (!attach->peer2peer)
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ }
+
+ if (domains & AMDGPU_GEM_DOMAIN_VRAM)
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- /* pin buffer into GTT */
- return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (WARN_ON(!domains))
+ return -EINVAL;
+
+ return amdgpu_bo_pin(bo, domains);
}
/**
@@ -131,9 +214,6 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return ERR_PTR(r);
-
- } else if (bo->tbo.resource->mem_type != TTM_PL_TT) {
- return ERR_PTR(-EBUSY);
}
switch (bo->tbo.resource->mem_type) {
@@ -150,6 +230,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
break;
case TTM_PL_VRAM:
+ /* XGMI-accessible memory should never be DMA-mapped */
+ if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible(
+ dma_buf_attach_adev(attach), bo)))
+ return ERR_PTR(-EINVAL);
+
r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
bo->tbo.base.size, attach->dev,
dir, &sgt);
@@ -181,7 +266,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- if (sgt->sgl->page_link) {
+ if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);
@@ -231,6 +316,36 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
}
+static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ int ret;
+
+ /*
+ * Pin to keep buffer in place while it's vmap'ed. The actual
+ * domain is not that important as long as it's mapable. Using
+ * GTT and VRAM should be compatible with most use cases.
+ */
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM);
+ if (ret)
+ return ret;
+ ret = drm_gem_dmabuf_vmap(dma_buf, map);
+ if (ret)
+ amdgpu_bo_unpin(bo);
+
+ return ret;
+}
+
+static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ drm_gem_dmabuf_vunmap(dma_buf, map);
+ amdgpu_bo_unpin(bo);
+}
+
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach,
.pin = amdgpu_dma_buf_pin,
@@ -240,8 +355,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
- .vmap = drm_gem_dmabuf_vmap,
- .vunmap = drm_gem_dmabuf_vunmap,
+ .vmap = amdgpu_dma_buf_vmap,
+ .vunmap = amdgpu_dma_buf_vunmap,
};
/**
@@ -259,11 +374,23 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct dma_buf *buf;
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = true,
+ /* We opt to avoid OOM on system pages allocations */
+ .gfp_retry_mayfail = true,
+ .allow_res_evict = false,
+ };
+ int ret;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
return ERR_PTR(-EPERM);
+ ret = ttm_bo_setup_export(&bo->tbo, &ctx);
+ if (ret)
+ return ERR_PTR(ret);
+
buf = drm_gem_prime_export(gobj, flags);
if (!IS_ERR(buf))
buf->ops = &amdgpu_dmabuf_ops;
@@ -345,7 +472,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
/* FIXME: This should be after the "if", but needs a fix to make sure
* DMABuf imports are initialized in the right VM list.
*/
- amdgpu_vm_bo_invalidate(adev, bo, false);
+ amdgpu_vm_bo_invalidate(bo, false);
if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
return;
@@ -456,7 +583,10 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
struct drm_gem_object *obj = &bo->tbo.base;
struct drm_gem_object *gobj;
- if (obj->import_attach) {
+ if (!adev)
+ return false;
+
+ if (drm_gem_is_imported(obj)) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
if (dma_buf->ops != &amdgpu_dmabuf_ops)