summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorAndré Almeida <andrealmeid@igalia.com>2023-09-11 14:12:55 -0300
committerAlex Deucher <alexander.deucher@amd.com>2023-09-11 17:22:23 -0400
commitffde72107be547d71f8074afd4f1ef1252073f3b (patch)
treed2f995b60514d11d78787f4345656098d37b7a15 /drivers
parent887db1e49a73bc48554a4959eb9679d3d6d90e4e (diff)
drm/amdgpu: Create an option to disable soft recovery
Create a module option to disable soft recoveries on amdgpu, making every recovery go through the device reset path. This option makes easier to force device resets for testing and debugging purposes. Signed-off-by: André Almeida <andrealmeid@igalia.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c6
3 files changed, 13 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index eaebd2884503..62bbfdd502af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1102,6 +1102,7 @@ struct amdgpu_device {
/* Debug */
bool debug_vm;
bool debug_largebar;
+ bool debug_disable_soft_recovery;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 745174d196d6..f735e91eef5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -124,6 +124,7 @@
enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_VM = BIT(0),
AMDGPU_DEBUG_LARGEBAR = BIT(1),
+ AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -945,6 +946,7 @@ MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics
* - 0x2: Enable simulating large-bar capability on non-large bar system. This
* limits the VRAM size reported to ROCm applications to the visible
* size, usually 256MB.
+ * - 0x4: Disable GPU soft recovery, always do a full reset
*/
MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
@@ -2064,6 +2066,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
adev->debug_largebar = true;
}
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY) {
+ pr_info("debug: soft reset for GPU recovery disabled\n");
+ adev->debug_disable_soft_recovery = true;
+ }
}
static int amdgpu_pci_probe(struct pci_dev *pdev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 80d6e132e409..6a80d3ec887e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -434,8 +434,12 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
unsigned long flags;
+ ktime_t deadline;
- ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+ if (unlikely(ring->adev->debug_disable_soft_recovery))
+ return false;
+
+ deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;