summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c65
1 files changed, 50 insertions, 15 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 94bdb5fa6ebc..1e9454e6e4cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -96,6 +96,9 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
#define AMDGPU_MAX_RETRY_LIMIT 2
#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
+#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
+#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
+#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
static const struct drm_driver amdgpu_kms_driver;
@@ -781,12 +784,22 @@ u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
void __iomem *pcie_index_hi_offset;
void __iomem *pcie_data_offset;
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
- pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
- else
+ if (unlikely(!adev->nbio.funcs)) {
+ pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
+ pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
+ } else {
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ }
+
+ if (reg_addr >> 32) {
+ if (unlikely(!adev->nbio.funcs))
+ pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
+ else
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ } else {
pcie_index_hi = 0;
+ }
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -1218,8 +1231,6 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
amdgpu_psp_wait_for_bootloader(adev);
ret = amdgpu_atomfirmware_asic_init(adev, true);
- /* TODO: check the return val and stop device initialization if boot fails */
- amdgpu_psp_query_boot_status(adev);
return ret;
} else {
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1442,6 +1453,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return 0;
+ /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
+ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
+ DRM_WARN("System can't access extended configuration space,please check!!\n");
+
/* skip if the bios has already enabled large BAR */
if (adev->gmc.real_vram_size &&
(pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
@@ -4025,8 +4040,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* early on during init and before calling to RREG32.
*/
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
- if (!adev->reset_domain)
- return -ENOMEM;
+ if (!adev->reset_domain) {
+ r = -ENOMEM;
+ goto unmap_memory;
+ }
/* detect hw virtualization here */
amdgpu_detect_virtualization(adev);
@@ -4036,20 +4053,20 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
- return r;
+ goto unmap_memory;
}
+ amdgpu_device_set_mcbp(adev);
+
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
- return r;
-
- amdgpu_device_set_mcbp(adev);
+ goto unmap_memory;
/* Get rid of things like offb */
r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
if (r)
- return r;
+ goto unmap_memory;
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
@@ -4059,7 +4076,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (adev->gmc.xgmi.supported) {
r = adev->gfxhub.funcs->get_xgmi_info(adev);
if (r)
- return r;
+ goto unmap_memory;
}
/* enable PCIE atomic ops */
@@ -4328,6 +4345,8 @@ release_ras_con:
failed:
amdgpu_vf_error_trans_all(adev);
+unmap_memory:
+ iounmap(adev->rmmio);
return r;
}
@@ -5306,6 +5325,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
if (need_full_reset) {
/* post card */
+ amdgpu_ras_set_fed(tmp_adev, false);
r = amdgpu_device_asic_init(tmp_adev);
if (r) {
dev_warn(tmp_adev->dev, "asic atom init failed!");
@@ -5686,6 +5706,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
IP_VERSION(9, 4, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
amdgpu_ras_resume(adev);
} else {
@@ -6107,6 +6128,20 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
struct amdgpu_reset_context reset_context;
u32 memsize;
struct list_head device_list;
+ struct amdgpu_hive_info *hive;
+ int hive_ras_recovery = 0;
+ struct amdgpu_ras *ras;
+
+ /* PCI error slot reset should be skipped During RAS recovery */
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+ ras = amdgpu_ras_get_context(adev);
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
+ ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
+ return PCI_ERS_RESULT_RECOVERED;
DRM_INFO("PCI error: slot reset callback!!\n");