summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2022-03-15 14:38:51 -0400
committerAlex Deucher <alexander.deucher@amd.com>2022-03-15 14:40:49 -0400
commit436afdfa35dc8aaf43959593f6c433d0ad29abc3 (patch)
tree5862ca181935ca6e78cedab4669936222361fa26 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parent72a98763b473890e6605604bfcaf71fc212b4720 (diff)
drm/amdgpu: Move reset domain init before calling RREG32
amdgpu_detect_virtualization reads register, amdgpu_device_rreg access adev->reset_domain->sem if kernel defined CONFIG_LOCKDEP, below is the random boot hang backtrace on Vega10. It may get random NULL pointer access backtrace if amdgpu_sriov_runtime is true too. Move amdgpu_reset_create_reset_domain before calling to RREG32. BUG: kernel NULL pointer dereference, address: #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI Workqueue: events work_for_cpu_fn RIP: 0010:down_read_trylock+0x13/0xf0 Call Trace: <TASK> amdgpu_device_skip_hw_access+0x38/0x80 [amdgpu] amdgpu_device_rreg+0x1b/0x170 [amdgpu] amdgpu_detect_virtualization+0x73/0x100 [amdgpu] amdgpu_device_init.cold.60+0xbe/0x16b1 [amdgpu] ? pci_bus_read_config_word+0x43/0x70 amdgpu_driver_load_kms+0x15/0x120 [amdgpu] amdgpu_pci_probe+0x1a1/0x3a0 [amdgpu] Fixes: d0fb18b535679a ("drm/amdgpu: Move reset sem into reset_domain") Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c18
1 files changed, 9 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1b313a95c7e5..3987ecb24ef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3666,6 +3666,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
adev->enable_mes = true;
+ /*
+ * Reset domain needs to be present early, before XGMI hive discovered
+ * (if any) and intitialized to use reset sem and in_gpu reset flag
+ * early on during init and before calling to RREG32.
+ */
+ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
+ if (!adev->reset_domain)
+ return -ENOMEM;
+
/* detect hw virtualization here */
amdgpu_detect_virtualization(adev);
@@ -3675,15 +3684,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
- /*
- * Reset domain needs to be present early, before XGMI hive discovered
- * (if any) and intitialized to use reset sem and in_gpu reset flag
- * early on during init.
- */
- adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE ,"amdgpu-reset-dev");
- if (!adev->reset_domain)
- return -ENOMEM;
-
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)