diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2292 |
1 files changed, 1593 insertions, 699 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5dc24c971b41..78f8755996f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -25,6 +25,8 @@ * Alex Deucher * Jerome Glisse */ + +#include <linux/aperture.h> #include <linux/power_supply.h> #include <linux/kthread.h> #include <linux/module.h> @@ -35,10 +37,9 @@ #include <linux/pci-p2pdma.h> #include <linux/apple-gmux.h> -#include <drm/drm_aperture.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_client_event.h> #include <drm/drm_crtc_helper.h> -#include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> #include <linux/device.h> @@ -74,6 +75,7 @@ #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" #include "amdgpu_virt.h" +#include "amdgpu_dev_coredump.h" #include <linux/suspend.h> #include <drm/task_barrier.h> @@ -83,6 +85,7 @@ #if IS_ENABLED(CONFIG_X86) #include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #endif MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); @@ -100,6 +103,9 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2) #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2) +#define AMDGPU_VBIOS_SKIP (1U << 0) +#define AMDGPU_VBIOS_OPTIONAL (1U << 1) + static const struct drm_driver amdgpu_kms_driver; const char *amdgpu_asic_name[] = { @@ -143,13 +149,70 @@ const char *amdgpu_asic_name[] = { "LAST", }; +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0) +/* + * Default init level where all blocks are expected to be initialized. This is + * the level of initialization expected by default and also after a full reset + * of the device. + */ +struct amdgpu_init_level amdgpu_init_default = { + .level = AMDGPU_INIT_LEVEL_DEFAULT, + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, +}; + +struct amdgpu_init_level amdgpu_init_recovery = { + .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY, + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, +}; + +/* + * Minimal blocks needed to be initialized before a XGMI hive can be reset. This + * is used for cases like reset on initialization where the entire hive needs to + * be reset before first use. + */ +struct amdgpu_init_level amdgpu_init_minimal_xgmi = { + .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI, + .hwini_ip_block_mask = + BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) | + BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) | + BIT(AMD_IP_BLOCK_TYPE_PSP) +}; + +static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev, + enum amd_ip_block_type block) +{ + return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0; +} + +void amdgpu_set_init_level(struct amdgpu_device *adev, + enum amdgpu_init_lvl_id lvl) +{ + switch (lvl) { + case AMDGPU_INIT_LEVEL_MINIMAL_XGMI: + adev->init_lvl = &amdgpu_init_minimal_xgmi; + break; + case AMDGPU_INIT_LEVEL_RESET_RECOVERY: + adev->init_lvl = &amdgpu_init_recovery; + break; + case AMDGPU_INIT_LEVEL_DEFAULT: + fallthrough; + default: + adev->init_lvl = &amdgpu_init_default; + break; + } +} + +static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data); + /** * DOC: pcie_replay_count * * The amdgpu driver provides a sysfs API for reporting the total number - * of PCIe replays (NAKs) + * of PCIe replays (NAKs). * The file pcie_replay_count is used for this and returns the total - * number of replays as a sum of the NAKs generated and NAKs received + * number of replays as a sum of the NAKs generated and NAKs received. */ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, @@ -165,8 +228,26 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); +static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) +{ + int ret = 0; + + if (!amdgpu_sriov_vf(adev)) + ret = sysfs_create_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); + + return ret; +} + +static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev)) + sysfs_remove_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); +} + static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -202,8 +283,8 @@ static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, return bytes_read; } -BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, - AMDGPU_SYS_REG_STATE_END); +static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, + AMDGPU_SYS_REG_STATE_END); int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev) { @@ -224,6 +305,42 @@ void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev) sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state); } +int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block) +{ + int r; + + if (ip_block->version->funcs->suspend) { + r = ip_block->version->funcs->suspend(ip_block); + if (r) { + dev_err(ip_block->adev->dev, + "suspend of IP block <%s> failed %d\n", + ip_block->version->funcs->name, r); + return r; + } + } + + ip_block->status.hw = false; + return 0; +} + +int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block) +{ + int r; + + if (ip_block->version->funcs->resume) { + r = ip_block->version->funcs->resume(ip_block); + if (r) { + dev_err(ip_block->adev->dev, + "resume of IP block <%s> failed %d\n", + ip_block->version->funcs->name, r); + return r; + } + } + + ip_block->status.hw = true; + return 0; +} + /** * DOC: board_info * @@ -324,6 +441,9 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); + if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + return false; + if (adev->has_pr3 || ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid())) return true; @@ -335,16 +455,94 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) * * @dev: drm_device pointer * - * Returns true if the device supporte BACO, - * otherwise return false. + * Return: + * 1 if the device supports BACO; + * 3 if the device supports MACO (only works if BACO is supported) + * otherwise return 0. */ -bool amdgpu_device_supports_baco(struct drm_device *dev) +int amdgpu_device_supports_baco(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); return amdgpu_asic_supports_baco(adev); } +void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) +{ + struct drm_device *dev; + int bamaco_support; + + dev = adev_to_drm(adev); + + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; + bamaco_support = amdgpu_device_supports_baco(dev); + + switch (amdgpu_runtime_pm) { + case 2: + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Forcing BAMACO for runtime pm\n"); + } else if (bamaco_support == BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n"); + } + break; + case 1: + if (bamaco_support & BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Forcing BACO for runtime pm\n"); + } + break; + case -1: + case -2: + if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; + dev_info(adev->dev, "Using ATPX for runtime pm\n"); + } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; + dev_info(adev->dev, "Using BOCO for runtime pm\n"); + } else { + if (!bamaco_support) + goto no_runtime_pm; + + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + /* BACO are not supported on vega20 and arctrus */ + break; + case CHIP_VEGA10: + /* enable BACO as runpm mode if noretry=0 */ + if (!adev->gmc.noretry && !amdgpu_passthrough(adev)) + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + default: + /* enable BACO as runpm mode on CI+ */ + if (!amdgpu_passthrough(adev)) + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + } + + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Using BAMACO for runtime pm\n"); + } else { + dev_info(adev->dev, "Using BACO for runtime pm\n"); + } + } + } + break; + case 0: + dev_info(adev->dev, "runtime pm is manually disabled\n"); + break; + default: + break; + } + +no_runtime_pm: + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) + dev_info(adev->dev, "Runtime PM not available\n"); +} /** * amdgpu_device_supports_smart_shift - Is the device dGPU with * smart shift support @@ -407,7 +605,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, } /** - * amdgpu_device_aper_access - access vram by vram aperature + * amdgpu_device_aper_access - access vram by vram aperture * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram @@ -498,7 +696,7 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) * here is that the GPU reset is not running on another thread in parallel. * * For this we trylock the read side of the reset semaphore, if that succeeds - * we know that the reset is not running in paralell. + * we know that the reset is not running in parallel. * * If the trylock fails we assert that we are either already holding the read * side of the lock or are the reset thread itself and hold the write side of @@ -599,7 +797,7 @@ uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, GC_HWIP, false, &rlcg_flag)) { - ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id); + ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id)); } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { @@ -730,7 +928,7 @@ void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, GC_HWIP, true, &rlcg_flag)) { - amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id); + amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id)); } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { @@ -1214,6 +1412,17 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, BUG(); } +static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev) +{ + if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) + return AMDGPU_VBIOS_SKIP; + + if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev)) + return AMDGPU_VBIOS_OPTIONAL; + + return 0; +} + /** * amdgpu_device_asic_init - Wrapper for atom asic_init * @@ -1223,16 +1432,28 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, */ static int amdgpu_device_asic_init(struct amdgpu_device *adev) { + uint32_t flags; + bool optional; int ret; amdgpu_asic_pre_asic_init(adev); + flags = amdgpu_device_get_vbios_flags(adev); + optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP)); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { amdgpu_psp_wait_for_bootloader(adev); + if (optional && !adev->bios) + return 0; + ret = amdgpu_atomfirmware_asic_init(adev, true); return ret; } else { + if (optional && !adev->bios) + return 0; + return amdgpu_atom_asic_init(adev->mode_info.atom_context); } @@ -1402,13 +1623,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) */ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) { - unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); + unsigned long flags, offset; + spin_lock_irqsave(&adev->wb.lock, flags); + offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); if (offset < adev->wb.num_wb) { __set_bit(offset, adev->wb.used); + spin_unlock_irqrestore(&adev->wb.lock, flags); *wb = offset << 3; /* convert to dw offset */ return 0; } else { + spin_unlock_irqrestore(&adev->wb.lock, flags); return -EINVAL; } } @@ -1423,9 +1648,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) */ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { + unsigned long flags; + wb >>= 3; + spin_lock_irqsave(&adev->wb.lock, flags); if (wb < adev->wb.num_wb) __clear_bit(wb, adev->wb.used); + spin_unlock_irqrestore(&adev->wb.lock, flags); } /** @@ -1453,9 +1682,19 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + if (!amdgpu_rebar) + return 0; + + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ + if ((amdgpu_runtime_pm != 0) && + adev->pdev->vendor == PCI_VENDOR_ID_ATI && + adev->pdev->device == 0x731f && + adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) + return 0; + /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) - DRM_WARN("System can't access extended configuration space,please check!!\n"); + DRM_WARN("System can't access extended configuration space, please check!!\n"); /* skip if the bios has already enabled large BAR */ if (adev->gmc.real_vram_size && @@ -1513,14 +1752,6 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) return 0; } -static bool amdgpu_device_read_bios(struct amdgpu_device *adev) -{ - if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) - return false; - - return true; -} - /* * GPU helpers function. */ @@ -1535,12 +1766,15 @@ static bool amdgpu_device_read_bios(struct amdgpu_device *adev) */ bool amdgpu_device_need_post(struct amdgpu_device *adev) { - uint32_t reg; + uint32_t reg, flags; if (amdgpu_sriov_vf(adev)) return false; - if (!amdgpu_device_read_bios(adev)) + flags = amdgpu_device_get_vbios_flags(adev); + if (flags & AMDGPU_VBIOS_SKIP) + return false; + if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios) return false; if (amdgpu_passthrough(adev)) { @@ -1554,7 +1788,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) uint32_t fw_ver; err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); - /* force vPost if error occured */ + /* force vPost if error occurred */ if (err) return true; @@ -1566,7 +1800,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) } /* Don't post if we need to reset whole hive on init */ - if (adev->gmc.xgmi.pending_reset) + if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI) return false; if (adev->has_hw_reset) { @@ -1641,6 +1875,35 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device return true; } +static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) + return false; + + if (c->x86 == 6 && + adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) { + switch (c->x86_model) { + case VFM_MODEL(INTEL_ALDERLAKE): + case VFM_MODEL(INTEL_ALDERLAKE_L): + case VFM_MODEL(INTEL_RAPTORLAKE): + case VFM_MODEL(INTEL_RAPTORLAKE_P): + case VFM_MODEL(INTEL_RAPTORLAKE_S): + return true; + default: + return false; + } + } else { + return false; + } +#else + return false; +#endif +} + /** * amdgpu_device_should_use_aspm - check if the device should program ASPM * @@ -1665,7 +1928,7 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) } if (adev->flags & AMD_IS_APU) return false; - if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK)) + if (amdgpu_device_aspm_support_quirk(adev)) return false; return pcie_aspm_enabled(adev->pdev); } @@ -1827,6 +2090,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) */ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int i; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -1881,6 +2146,32 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + for (i = 0; i < MAX_XCP; i++) { + switch (amdgpu_enforce_isolation) { + case -1: + case 0: + default: + /* disable */ + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; + break; + case 1: + /* enable */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE; + break; + case 2: + /* enable legacy mode */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; + break; + case 3: + /* enable only process isolation without submitting cleaner shader */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER; + break; + } + } + return 0; } @@ -1981,7 +2272,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev, if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) continue; r = adev->ip_blocks[i].version->funcs->set_clockgating_state( - (void *)adev, state); + &adev->ip_blocks[i], state); if (r) DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2015,7 +2306,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, if (!adev->ip_blocks[i].version->funcs->set_powergating_state) continue; r = adev->ip_blocks[i].version->funcs->set_powergating_state( - (void *)adev, state); + &adev->ip_blocks[i], state); if (r) DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2043,7 +2334,8 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, if (!adev->ip_blocks[i].status.valid) continue; if (adev->ip_blocks[i].version->funcs->get_clockgating_state) - adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); + adev->ip_blocks[i].version->funcs->get_clockgating_state( + &adev->ip_blocks[i], flags); } } @@ -2065,9 +2357,12 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, if (!adev->ip_blocks[i].status.valid) continue; if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); - if (r) - return r; + if (adev->ip_blocks[i].version->funcs->wait_for_idle) { + r = adev->ip_blocks[i].version->funcs->wait_for_idle( + &adev->ip_blocks[i]); + if (r) + return r; + } break; } } @@ -2076,26 +2371,24 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, } /** - * amdgpu_device_ip_is_idle - is the hardware IP idle + * amdgpu_device_ip_is_valid - is the hardware IP enabled * * @adev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * - * Check if the hardware IP is idle or not. - * Returns true if it the IP is idle, false if not. + * Check if the hardware IP is enable or not. + * Returns true if it the IP is enable, false if not. */ -bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type) +bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) { int i; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; if (adev->ip_blocks[i].version->type == block_type) - return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); + return adev->ip_blocks[i].status.valid; } - return true; + return false; } @@ -2174,8 +2467,10 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, break; } - DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, - ip_block_version->funcs->name); + dev_info(adev->dev, "detected ip block number %d <%s>\n", + adev->num_ip_blocks, ip_block_version->funcs->name); + + adev->ip_blocks[adev->num_ip_blocks].adev = adev; adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; @@ -2191,7 +2486,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, * the module parameter virtual_display. This feature provides a virtual * display hardware on headless boards or in virtualized environments. * This function parses and validates the configuration string specified by - * the user and configues the virtual display configuration (number of + * the user and configures the virtual display configuration (number of * virtual connectors, crtcs, etc.) specified. */ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) @@ -2254,14 +2549,13 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Parses the asic configuration parameters specified in the gpu info - * firmware and makes them availale to the driver for use in configuring + * firmware and makes them available to the driver for use in configuring * the asic. * Returns 0 on success, -EINVAL on failure. */ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[40]; int err; const struct gpu_info_firmware_header_v1_0 *hdr; @@ -2295,12 +2589,13 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) break; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, + AMDGPU_UCODE_OPTIONAL, + "amdgpu/%s_gpu_info.bin", chip_name); if (err) { dev_err(adev->dev, - "Failed to get gpu_info firmware \"%s\"\n", - fw_name); + "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n", + chip_name); goto out; } @@ -2315,7 +2610,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) le32_to_cpu(hdr->header.ucode_array_offset_bytes)); /* - * Should be droped when DAL no longer needs it. + * Should be dropped when DAL no longer needs it. */ if (adev->asic_type == CHIP_NAVI12) goto parse_soc_bounding_box; @@ -2383,9 +2678,11 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { + struct amdgpu_ip_block *ip_block; struct pci_dev *parent; + bool total, skip_bios; + uint32_t bios_flags; int i, r; - bool total; amdgpu_device_enable_virtual_display(adev); @@ -2449,6 +2746,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; } + /* Check for IP version 9.4.3 with A0 hardware */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && + !amdgpu_device_get_rev_id(adev)) { + dev_err(adev->dev, "Unsupported A0 hardware\n"); + return -ENODEV; /* device unsupported - no device error */ + } + if (amdgpu_has_atpx() && (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && @@ -2461,7 +2765,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } - adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; @@ -2472,25 +2775,25 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) total = true; for (i = 0; i < adev->num_ip_blocks; i++) { + ip_block = &adev->ip_blocks[i]; + if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_WARN("disabled ip block: %d <%s>\n", i, adev->ip_blocks[i].version->funcs->name); adev->ip_blocks[i].status.valid = false; - } else { - if (adev->ip_blocks[i].version->funcs->early_init) { - r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); - if (r == -ENOENT) { - adev->ip_blocks[i].status.valid = false; - } else if (r) { - DRM_ERROR("early_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - total = false; - } else { - adev->ip_blocks[i].status.valid = true; - } + } else if (ip_block->version->funcs->early_init) { + r = ip_block->version->funcs->early_init(ip_block); + if (r == -ENOENT) { + adev->ip_blocks[i].status.valid = false; + } else if (r) { + DRM_ERROR("early_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + total = false; } else { adev->ip_blocks[i].status.valid = true; } + } else { + adev->ip_blocks[i].status.valid = true; } /* get the vbios after the asic_funcs are set up */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { @@ -2498,16 +2801,31 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; + bios_flags = amdgpu_device_get_vbios_flags(adev); + skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP); /* Read BIOS */ - if (amdgpu_device_read_bios(adev)) { - if (!amdgpu_get_bios(adev)) + if (!skip_bios) { + bool optional = + !!(bios_flags & AMDGPU_VBIOS_OPTIONAL); + if (!amdgpu_get_bios(adev) && !optional) return -EINVAL; - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - return r; + if (optional && !adev->bios) + dev_info( + adev->dev, + "VBIOS image optional, proceeding without VBIOS image"); + + if (adev->bios) { + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, + "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put( + adev, + AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, + 0, 0); + return r; + } } } @@ -2520,7 +2838,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; - amdgpu_amdkfd_device_probe(adev); + if (adev->gmc.xgmi.supported) + amdgpu_xgmi_early_init(adev); + + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + if (ip_block->status.valid != false) + amdgpu_amdkfd_device_probe(adev); + adev->cg_flags &= amdgpu_cg_mask; adev->pg_flags &= amdgpu_pg_mask; @@ -2536,10 +2860,13 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hw) continue; + if (!amdgpu_ip_member_of_hwini( + adev, adev->ip_blocks[i].version->type)) + continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { - r = adev->ip_blocks[i].version->funcs->hw_init(adev); + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2561,7 +2888,10 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hw) continue; - r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (!amdgpu_ip_member_of_hwini( + adev, adev->ip_blocks[i].version->type)) + continue; + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2584,6 +2914,10 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) continue; + if (!amdgpu_ip_member_of_hwini(adev, + AMD_IP_BLOCK_TYPE_PSP)) + break; + if (!adev->ip_blocks[i].status.sw) continue; @@ -2592,22 +2926,18 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) break; if (amdgpu_in_reset(adev) || adev->in_suspend) { - r = adev->ip_blocks[i].version->funcs->resume(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + r = amdgpu_ip_block_resume(&adev->ip_blocks[i]); + if (r) return r; - } } else { - r = adev->ip_blocks[i].version->funcs->hw_init(adev); + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); return r; } + adev->ip_blocks[i].status.hw = true; } - - adev->ip_blocks[i].status.hw = true; break; } } @@ -2620,6 +2950,12 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) { + struct drm_sched_init_args args = { + .ops = &amdgpu_sched_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .timeout_wq = adev->reset_domain->wq, + .dev = adev->dev, + }; long timeout; int r, i; @@ -2645,12 +2981,12 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) break; } - r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL, - DRM_SCHED_PRIORITY_COUNT, - ring->num_hw_submission, 0, - timeout, adev->reset_domain->wq, - ring->sched_score, ring->name, - adev->dev); + args.timeout = timeout; + args.credit_limit = ring->num_hw_submission; + args.score = ring->sched_score; + args.name = ring->name; + + r = drm_sched_init(&ring->sched, &args); if (r) { DRM_ERROR("Failed to create scheduler on ring %s.\n", ring->name); @@ -2689,6 +3025,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) */ static int amdgpu_device_ip_init(struct amdgpu_device *adev) { + bool init_badpage; int i, r; r = amdgpu_ras_init(adev); @@ -2698,17 +3035,23 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); - if (r) { - DRM_ERROR("sw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - goto init_failed; + if (adev->ip_blocks[i].version->funcs->sw_init) { + r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]); + if (r) { + DRM_ERROR("sw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + goto init_failed; + } } adev->ip_blocks[i].status.sw = true; + if (!amdgpu_ip_member_of_hwini( + adev, adev->ip_blocks[i].version->type)) + continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { /* need to do common hw init early so everything is set up for gmc */ - r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init %d failed %d\n", i, r); goto init_failed; @@ -2725,7 +3068,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r); goto init_failed; } - r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init %d failed %d\n", i, r); goto init_failed; @@ -2798,7 +3141,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) * Note: theoretically, this should be called before all vram allocations * to protect retired page from abusing */ - r = amdgpu_ras_recovery_init(adev); + init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI); + r = amdgpu_ras_recovery_init(adev, init_badpage); if (r) goto init_failed; @@ -2838,13 +3182,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_ttm_set_buffer_funcs_status(adev, true); /* Don't init kfd if whole hive need to be reset during init */ - if (!adev->gmc.xgmi.pending_reset) { + if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { kgd2kfd_init_zone_device(adev); amdgpu_amdkfd_device_init(adev); } amdgpu_fru_get_product_info(adev); + if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev)) + r = amdgpu_cper_init(adev); + init_failed: return r; @@ -2857,7 +3204,7 @@ init_failed: * * Writes a reset magic value to the gart pointer in VRAM. The driver calls * this function before a GPU reset. If the value is retained after a - * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. + * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents. */ static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) { @@ -2888,6 +3235,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) * always assumed to be lost. */ switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_LINK: case AMD_RESET_METHOD_BACO: case AMD_RESET_METHOD_MODE1: return true; @@ -2933,7 +3281,7 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ - r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, + r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i], state); if (r) { DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", @@ -2970,7 +3318,7 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_powergating_state) { /* enable powergating to save power */ - r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, + r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i], state); if (r) { DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", @@ -3038,7 +3386,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->funcs->late_init) { - r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); + r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("late_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -3054,7 +3402,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return r; } - amdgpu_ras_set_error_query_ready(adev, true); + if (!amdgpu_reset_in_recovery(adev)) + amdgpu_ras_set_error_query_ready(adev, true); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); @@ -3108,6 +3457,25 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return 0; } +static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block) +{ + int r; + + if (!ip_block->version->funcs->hw_fini) { + DRM_ERROR("hw_fini of IP block <%s> not defined\n", + ip_block->version->funcs->name); + } else { + r = ip_block->version->funcs->hw_fini(ip_block); + /* XXX handle errors */ + if (r) { + DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", + ip_block->version->funcs->name, r); + } + } + + ip_block->status.hw = false; +} + /** * amdgpu_device_smu_fini_early - smu hw_fini wrapper * @@ -3117,7 +3485,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) */ static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) { - int i, r; + int i; if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) return; @@ -3126,13 +3494,7 @@ static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { - r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); - /* XXX handle errors */ - if (r) { - DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - } - adev->ip_blocks[i].status.hw = false; + amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]); break; } } @@ -3146,7 +3508,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) if (!adev->ip_blocks[i].version->funcs->early_fini) continue; - r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); + r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]); if (r) { DRM_DEBUG("early_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -3157,22 +3519,16 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); amdgpu_amdkfd_suspend(adev, false); + amdgpu_userq_suspend(adev); - /* Workaroud for ASICs need to disable SMC first */ + /* Workaround for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.hw) continue; - r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); - /* XXX handle errors */ - if (r) { - DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - } - - adev->ip_blocks[i].status.hw = false; + amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]); } if (amdgpu_sriov_vf(adev)) { @@ -3198,6 +3554,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + amdgpu_cper_fini(adev); + if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) amdgpu_virt_release_ras_err_handler_data(adev); @@ -3217,13 +3575,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_device_mem_scratch_fini(adev); amdgpu_ib_pool_fini(adev); amdgpu_seq64_fini(adev); + amdgpu_doorbell_fini(adev); } - - r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); - /* XXX handle errors */ - if (r) { - DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + if (adev->ip_blocks[i].version->funcs->sw_fini) { + r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); + /* XXX handle errors */ + if (r) { + DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + } } adev->ip_blocks[i].status.sw = false; adev->ip_blocks[i].status.valid = false; @@ -3233,7 +3593,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.late_initialized) continue; if (adev->ip_blocks[i].version->funcs->late_fini) - adev->ip_blocks[i].version->funcs->late_fini((void *)adev); + adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]); adev->ip_blocks[i].status.late_initialized = false; } @@ -3266,7 +3626,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) WARN_ON_ONCE(adev->gfx.gfx_off_state); WARN_ON_ONCE(adev->gfx.gfx_off_req_count); - if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) + if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0)) adev->gfx.gfx_off_state = true; } @@ -3305,15 +3665,9 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) continue; /* XXX handle errors */ - r = adev->ip_blocks[i].version->funcs->suspend(adev); - /* XXX handle errors */ - if (r) { - DRM_ERROR("suspend of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]); + if (r) return r; - } - - adev->ip_blocks[i].status.hw = false; } return 0; @@ -3351,15 +3705,17 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) } /* skip unnecessary suspend if we do not initialize them yet */ - if (adev->gmc.xgmi.pending_reset && - !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) { - adev->ip_blocks[i].status.hw = false; + if (!amdgpu_ip_member_of_hwini( + adev, adev->ip_blocks[i].version->type)) continue; - } + /* Since we skip suspend for S0i3, we need to cancel the delayed + * idle work here as the suspend callback never gets called. + */ + if (adev->in_s0ix && + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) + cancel_delayed_work_sync(&adev->gfx.idle_work); /* skip suspend of gfx/mes and psp for S0ix * gfx is in gfxoff state, so on resume it will exit gfxoff just * like at runtime. PSP is also part of the always on hardware @@ -3392,13 +3748,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) continue; /* XXX handle errors */ - r = adev->ip_blocks[i].version->funcs->suspend(adev); - /* XXX handle errors */ - if (r) { - DRM_ERROR("suspend of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - } + r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]); adev->ip_blocks[i].status.hw = false; + /* handle putting the SMC in the appropriate state */ if (!amdgpu_sriov_vf(adev)) { if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { @@ -3472,10 +3824,12 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) !block->status.valid) continue; - r = block->version->funcs->hw_init(adev); - DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); - if (r) + r = block->version->funcs->hw_init(&adev->ip_blocks[i]); + if (r) { + dev_err(adev->dev, "RE-INIT-early: %s failed\n", + block->version->funcs->name); return r; + } block->status.hw = true; } } @@ -3485,7 +3839,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) { - int i, r; + struct amdgpu_ip_block *block; + int i, r = 0; static enum amd_ip_block_type ip_order[] = { AMD_IP_BLOCK_TYPE_SMC, @@ -3500,30 +3855,28 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) }; for (i = 0; i < ARRAY_SIZE(ip_order); i++) { - int j; - struct amdgpu_ip_block *block; - - for (j = 0; j < adev->num_ip_blocks; j++) { - block = &adev->ip_blocks[j]; + block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]); - if (block->version->type != ip_order[i] || - !block->status.valid || - block->status.hw) - continue; + if (!block) + continue; - if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) - r = block->version->funcs->resume(adev); - else - r = block->version->funcs->hw_init(adev); + if (block->status.valid && !block->status.hw) { + if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) { + r = amdgpu_ip_block_resume(block); + } else { + r = block->version->funcs->hw_init(block); + } - DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); - if (r) - return r; + if (r) { + dev_err(adev->dev, "RE-INIT-late: %s failed\n", + block->version->funcs->name); + break; + } block->status.hw = true; } } - return 0; + return r; } /** @@ -3550,13 +3903,9 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) { - r = adev->ip_blocks[i].version->funcs->resume(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + r = amdgpu_ip_block_resume(&adev->ip_blocks[i]); + if (r) return r; - } - adev->ip_blocks[i].status.hw = true; } } @@ -3568,7 +3917,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * First resume function for hardware IPs. The list of all the hardware + * Second resume function for hardware IPs. The list of all the hardware * IPs that make up the asic is walked and the resume callbacks are run for * all blocks except COMMON, GMC, and IH. resume puts the hardware into a * functional state after a suspend and updates the software state as @@ -3586,15 +3935,42 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) continue; - r = adev->ip_blocks[i].version->funcs->resume(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + r = amdgpu_ip_block_resume(&adev->ip_blocks[i]); + if (r) return r; + } + + return 0; +} + +/** + * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs + * + * @adev: amdgpu_device pointer + * + * Third resume function for hardware IPs. The list of all the hardware + * IPs that make up the asic is walked and the resume callbacks are run for + * all DCE. resume puts the hardware into a functional state after a suspend + * and updates the software state as necessary. This function is also used + * for restoring the GPU after a GPU reset. + * + * Returns 0 on success, negative error code on failure. + */ +static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) + continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { + r = amdgpu_ip_block_resume(&adev->ip_blocks[i]); + if (r) + return r; } - adev->ip_blocks[i].status.hw = true; } return 0; @@ -3629,6 +4005,13 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) if (adev->mman.buffer_funcs_ring->sched.ready) amdgpu_ttm_set_buffer_funcs_status(adev, true); + if (r) + return r; + + amdgpu_fence_driver_hw_init(adev); + + r = amdgpu_device_ip_resume_phase3(adev); + return r; } @@ -3859,10 +4242,26 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) adev->ram_is_direct_mapped = true; } -static const struct attribute *amdgpu_dev_attributes[] = { - &dev_attr_pcie_replay_count.attr, - NULL -}; +#if defined(CONFIG_HSA_AMD_P2P) +/** + * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled. + * + * @adev: amdgpu_device pointer + * + * return if IOMMU remapping bar address + */ +static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(adev->dev); + if (domain && (domain->type == IOMMU_DOMAIN_DMA || + domain->type == IOMMU_DOMAIN_DMA_FQ)) + return true; + + return false; +} +#endif static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) { @@ -3965,6 +4364,18 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->notifier_lock); mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); + mutex_init(&adev->gfx.reset_sem_mutex); + /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ + mutex_init(&adev->enforce_isolation_mutex); + for (i = 0; i < MAX_XCP; ++i) { + adev->isolation[i].spearhead = dma_fence_get_stub(); + amdgpu_sync_create(&adev->isolation[i].active); + amdgpu_sync_create(&adev->isolation[i].prev); + } + mutex_init(&adev->gfx.userq_sch_mutex); + mutex_init(&adev->gfx.workload_profile_mutex); + mutex_init(&adev->vcn.workload_profile_mutex); + mutex_init(&adev->userq_mutex); amdgpu_device_init_apu_flags(adev); @@ -3981,9 +4392,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->se_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->mm_stats.lock); + spin_lock_init(&adev->virt.rlcg_reg_lock); + spin_lock_init(&adev->wb.lock); - INIT_LIST_HEAD(&adev->shadow_list); - mutex_init(&adev->shadow_list_lock); + xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ); INIT_LIST_HEAD(&adev->reset_list); @@ -3991,10 +4403,27 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->pm.od_kobj_list); + INIT_LIST_HEAD(&adev->userq_mgr_list); + INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + /* + * Initialize the enforce_isolation work structures for each XCP + * partition. This work handler is responsible for enforcing shader + * isolation on AMD GPUs. It counts the number of emitted fences for + * each GFX and compute ring. If there are any fences, it schedules + * the `enforce_isolation_work` to be run after a delay. If there are + * no fences, it signals the Kernel Fusion Driver (KFD) to resume the + * runqueue. + */ + for (i = 0; i < MAX_XCP; i++) { + INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work, + amdgpu_gfx_enforce_isolation_handler); + adev->gfx.enforce_isolation[i].adev = adev; + adev->gfx.enforce_isolation[i].xcp_id = i; + } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); @@ -4012,6 +4441,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, * for throttling interrupt) = 60 seconds. */ ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); + ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); /* Registers mapping */ @@ -4036,7 +4466,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* * Reset domain needs to be present early, before XGMI hive discovered - * (if any) and intitialized to use reset sem and in_gpu reset flag + * (if any) and initialized to use reset sem and in_gpu reset flag * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); @@ -4044,7 +4474,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, return -ENOMEM; /* detect hw virtualization here */ - amdgpu_detect_virtualization(adev); + amdgpu_virt_init(adev); amdgpu_device_get_pcie_info(adev); @@ -4056,19 +4486,39 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_set_mcbp(adev); + /* + * By default, use default mode where all blocks are expected to be + * initialized. At present a 'swinit' of blocks is required to be + * completed before the need for a different level is detected. + */ + amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT); /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) return r; - /* Get rid of things like offb */ - r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); - if (r) - return r; + /* + * No need to remove conflicting FBs for non-display class devices. + * This prevents the sysfb from being freed accidently. + */ + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { + /* Get rid of things like offb */ + r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); + if (r) + return r; + } /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); + if (amdgpu_sriov_vf(adev) && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) + /* VF MMIO access (except mailbox range) from CPU + * will be blocked during sriov runtime + */ + adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { @@ -4121,32 +4571,24 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { if (adev->gmc.xgmi.num_physical_nodes) { dev_info(adev->dev, "Pending hive reset.\n"); - adev->gmc.xgmi.pending_reset = true; - /* Only need to init necessary block for SMU to handle the reset */ - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) { - DRM_DEBUG("IP %s disabled for hw_init.\n", - adev->ip_blocks[i].version->funcs->name); - adev->ip_blocks[i].status.hw = true; - } - } + amdgpu_set_init_level(adev, + AMDGPU_INIT_LEVEL_MINIMAL_XGMI); + } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + !amdgpu_device_has_display_hardware(adev)) { + r = psp_gpu_reset(adev); } else { - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; - if (r) { - dev_err(adev->dev, "asic reset on init failed\n"); - goto failed; - } + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; + } + + if (r) { + dev_err(adev->dev, "asic reset on init failed\n"); + goto failed; } } @@ -4183,8 +4625,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto failed; } /* init i2c buses */ - if (!amdgpu_device_has_dc_support(adev)) - amdgpu_atombios_i2c_init(adev); + amdgpu_i2c_init(adev); } } @@ -4238,7 +4679,7 @@ fence_driver_init: /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ - if (!adev->gmc.xgmi.pending_reset) { + if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { r = amdgpu_device_ip_late_init(adev); if (r) { dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); @@ -4277,7 +4718,7 @@ fence_driver_init: } else adev->ucode_sysfs_en = true; - r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); + r = amdgpu_device_attr_sysfs_init(adev); if (r) dev_err(adev->dev, "Could not create amdgpu device attr\n"); @@ -4288,6 +4729,7 @@ fence_driver_init: amdgpu_fru_sysfs_init(adev); amdgpu_reg_state_sysfs_init(adev); + amdgpu_xcp_sysfs_init(adev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) r = amdgpu_pmu_init(adev); @@ -4315,12 +4757,16 @@ fence_driver_init: if (px) vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); - if (adev->gmc.xgmi.pending_reset) - queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); + if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI) + amdgpu_xgmi_reset_on_init(adev); amdgpu_device_check_iommu_direct_map(adev); + adev->pm_nb.notifier_call = amdgpu_device_pm_notifier; + r = register_pm_notifier(&adev->pm_nb); + if (r) + goto failed; + return 0; release_ras_con: @@ -4380,8 +4826,13 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) { dev_info(adev->dev, "amdgpu: finishing device.\n"); flush_delayed_work(&adev->delayed_init_work); + + if (adev->mman.initialized) + drain_workqueue(adev->mman.bdev.wq); adev->shutdown = true; + unregister_pm_notifier(&adev->pm_nb); + /* make sure IB test finished before entering exclusive mode * to avoid preemption on IB test */ @@ -4400,17 +4851,15 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) } amdgpu_fence_driver_hw_fini(adev); - if (adev->mman.initialized) - drain_workqueue(adev->mman.bdev.wq); - if (adev->pm.sysfs_initialized) amdgpu_pm_sysfs_fini(adev); if (adev->ucode_sysfs_en) amdgpu_ucode_sysfs_fini(adev); - sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); + amdgpu_device_attr_sysfs_fini(adev); amdgpu_fru_sysfs_fini(adev); amdgpu_reg_state_sysfs_fini(adev); + amdgpu_xcp_sysfs_fini(adev); /* disable ras feature must before hw fini */ amdgpu_ras_pre_fini(adev); @@ -4433,30 +4882,37 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) void amdgpu_device_fini_sw(struct amdgpu_device *adev) { - int idx; + int i, idx; bool px; - amdgpu_fence_driver_sw_fini(adev); amdgpu_device_ip_fini(adev); + amdgpu_fence_driver_sw_fini(adev); amdgpu_ucode_release(&adev->firmware.gpu_info_fw); adev->accel_working = false; dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); + for (i = 0; i < MAX_XCP; ++i) { + dma_fence_put(adev->isolation[i].spearhead); + amdgpu_sync_free(&adev->isolation[i].active); + amdgpu_sync_free(&adev->isolation[i].prev); + } amdgpu_reset_fini(adev); /* free i2c buses */ - if (!amdgpu_device_has_dc_support(adev)) - amdgpu_i2c_fini(adev); - - if (amdgpu_emu_mode != 1) - amdgpu_atombios_fini(adev); + amdgpu_i2c_fini(adev); - kfree(adev->bios); - adev->bios = NULL; + if (adev->bios) { + if (amdgpu_emu_mode != 1) + amdgpu_atombios_fini(adev); + amdgpu_bios_release(adev); + } kfree(adev->fru_info); adev->fru_info = NULL; + kfree(adev->xcp_mgr); + adev->xcp_mgr = NULL; + px = amdgpu_device_supports_px(adev_to_drm(adev)); if (px || (!dev_is_removable(&adev->pdev->dev) && @@ -4473,7 +4929,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - amdgpu_doorbell_fini(adev); drm_dev_exit(idx); } @@ -4502,8 +4957,8 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) { int ret; - /* No need to evict vram on APUs for suspend to ram or s2idle */ - if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) + /* No need to evict vram on APUs unless going to S4 */ + if (!adev->in_s4 && (adev->flags & AMD_IS_APU)) return 0; ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); @@ -4516,6 +4971,33 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * Suspend & resume. */ /** + * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events + * @nb: notifier block + * @mode: suspend mode + * @data: data + * + * This function is called when the system is about to suspend or hibernate. + * It is used to set the appropriate flags so that eviction can be optimized + * in the pm prepare callback. + */ +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data) +{ + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); + + switch (mode) { + case PM_HIBERNATION_PREPARE: + adev->in_s4 = true; + break; + case PM_POST_HIBERNATION: + adev->in_s4 = false; + break; + } + + return NOTIFY_DONE; +} + +/** * amdgpu_device_prepare - prepare for device suspend * * @dev: drm dev pointer @@ -4529,45 +5011,40 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; - amdgpu_choose_low_power_state(adev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - goto unprepare; + return r; + + flush_delayed_work(&adev->gfx.gfx_off_delay_work); for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; if (!adev->ip_blocks[i].version->funcs->prepare_suspend) continue; - r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); + r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]); if (r) - goto unprepare; + return r; } return 0; - -unprepare: - adev->in_s0ix = adev->in_s3 = false; - - return r; } /** * amdgpu_device_suspend - initiate device suspend * * @dev: drm dev pointer - * @fbcon : notify the fbdev of suspend + * @notify_clients: notify in-kernel DRM clients * * Puts the hw in the suspend state (all asics). * Returns 0 for success or an error on failure. * Called at driver suspend. */ -int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) +int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) { struct amdgpu_device *adev = drm_to_adev(dev); int r = 0; @@ -4587,8 +5064,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) DRM_WARN("smart shift update failed\n"); - if (fbcon) - drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); + if (notify_clients) + drm_client_dev_suspend(adev_to_drm(adev), false); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -4596,8 +5073,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_device_ip_suspend_phase1(adev); - if (!adev->in_s0ix) + if (!adev->in_s0ix) { amdgpu_amdkfd_suspend(adev, adev->in_runpm); + amdgpu_userq_suspend(adev); + } r = amdgpu_device_evict_resources(adev); if (r) @@ -4623,13 +5102,13 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) * amdgpu_device_resume - initiate device resume * * @dev: drm dev pointer - * @fbcon : notify the fbdev of resume + * @notify_clients: notify in-kernel DRM clients * * Bring the hw back to operating state (all asics). * Returns 0 for success or an error on failure. * Called at driver resume. */ -int amdgpu_device_resume(struct drm_device *dev, bool fbcon) +int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) { struct amdgpu_device *adev = drm_to_adev(dev); int r = 0; @@ -4659,12 +5138,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); goto exit; } - amdgpu_fence_driver_hw_init(adev); if (!adev->in_s0ix) { r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) goto exit; + + r = amdgpu_userq_resume(adev); + if (r) + goto exit; } r = amdgpu_device_ip_late_init(adev); @@ -4685,8 +5167,8 @@ exit: /* Make sure IB tests flushed */ flush_delayed_work(&adev->delayed_init_work); - if (fbcon) - drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); + if (notify_clients) + drm_client_dev_resume(adev_to_drm(adev), false); amdgpu_ras_resume(adev); @@ -4713,9 +5195,6 @@ exit: } adev->in_suspend = false; - if (adev->enable_mes) - amdgpu_mes_self_test(adev); - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) DRM_WARN("smart shift update failed\n"); @@ -4748,7 +5227,8 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].version->funcs->check_soft_reset) adev->ip_blocks[i].status.hang = - adev->ip_blocks[i].version->funcs->check_soft_reset(adev); + adev->ip_blocks[i].version->funcs->check_soft_reset( + &adev->ip_blocks[i]); if (adev->ip_blocks[i].status.hang) { dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); asic_hang = true; @@ -4777,7 +5257,7 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hang && adev->ip_blocks[i].version->funcs->pre_soft_reset) { - r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); + r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]); if (r) return r; } @@ -4839,7 +5319,7 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hang && adev->ip_blocks[i].version->funcs->soft_reset) { - r = adev->ip_blocks[i].version->funcs->soft_reset(adev); + r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]); if (r) return r; } @@ -4868,7 +5348,7 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hang && adev->ip_blocks[i].version->funcs->post_soft_reset) - r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); + r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]); if (r) return r; } @@ -4877,103 +5357,33 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) } /** - * amdgpu_device_recover_vram - Recover some VRAM contents - * - * @adev: amdgpu_device pointer - * - * Restores the contents of VRAM buffers from the shadows in GTT. Used to - * restore things like GPUVM page tables after a GPU reset where - * the contents of VRAM might be lost. - * - * Returns: - * 0 on success, negative error code on failure. - */ -static int amdgpu_device_recover_vram(struct amdgpu_device *adev) -{ - struct dma_fence *fence = NULL, *next = NULL; - struct amdgpu_bo *shadow; - struct amdgpu_bo_vm *vmbo; - long r = 1, tmo; - - if (amdgpu_sriov_runtime(adev)) - tmo = msecs_to_jiffies(8000); - else - tmo = msecs_to_jiffies(100); - - dev_info(adev->dev, "recover vram bo from shadow start\n"); - mutex_lock(&adev->shadow_list_lock); - list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { - /* If vm is compute context or adev is APU, shadow will be NULL */ - if (!vmbo->shadow) - continue; - shadow = vmbo->shadow; - - /* No need to recover an evicted BO */ - if (shadow->tbo.resource->mem_type != TTM_PL_TT || - shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || - shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) - continue; - - r = amdgpu_bo_restore_shadow(shadow, &next); - if (r) - break; - - if (fence) { - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - fence = next; - if (tmo == 0) { - r = -ETIMEDOUT; - break; - } else if (tmo < 0) { - r = tmo; - break; - } - } else { - fence = next; - } - } - mutex_unlock(&adev->shadow_list_lock); - - if (fence) - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - - if (r < 0 || tmo <= 0) { - dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); - return -EIO; - } - - dev_info(adev->dev, "recover vram bo from shadow done\n"); - return 0; -} - - -/** * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu_device pointer - * @from_hypervisor: request from hypervisor + * @reset_context: amdgpu reset context pointer * * do VF FLR and reinitialize Asic * return 0 means succeeded otherwise failed */ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, - bool from_hypervisor) + struct amdgpu_reset_context *reset_context) { int r; struct amdgpu_hive_info *hive = NULL; - int retry_limit = 0; -retry: - amdgpu_amdkfd_pre_reset(adev); - - if (from_hypervisor) + if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) { + if (!amdgpu_ras_get_fed_status(adev)) + amdgpu_virt_ready_to_reset(adev); + amdgpu_virt_wait_reset(adev); + clear_bit(AMDGPU_HOST_FLR, &reset_context->flags); r = amdgpu_virt_request_full_gpu(adev, true); - else + } else { r = amdgpu_virt_reset_gpu(adev); + } if (r) return r; + + amdgpu_ras_clear_err_state(adev); amdgpu_irq_gpu_reset_resume_helper(adev); /* some sw clean up VF needs to do before recover */ @@ -4982,7 +5392,7 @@ retry: /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) - goto error; + return r; amdgpu_virt_init_data_exchange(adev); @@ -4993,51 +5403,56 @@ retry: /* now we are okay to resume SMC/CP/SDMA */ r = amdgpu_device_ip_reinit_late_sriov(adev); if (r) - goto error; + return r; hive = amdgpu_get_xgmi_hive(adev); /* Update PSP FW topology after reset */ if (hive && adev->gmc.xgmi.num_physical_nodes > 1) r = amdgpu_xgmi_update_topology(hive, adev); - if (hive) amdgpu_put_xgmi_hive(hive); + if (r) + return r; - if (!r) { - r = amdgpu_ib_ring_tests(adev); - - amdgpu_amdkfd_post_reset(adev); - } + r = amdgpu_ib_ring_tests(adev); + if (r) + return r; -error: - if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) amdgpu_inc_vram_lost(adev); - r = amdgpu_device_recover_vram(adev); - } + + /* need to be called during full access so we can't do it later like + * bare-metal does. + */ + amdgpu_amdkfd_post_reset(adev); amdgpu_virt_release_full_gpu(adev, true); - if (AMDGPU_RETRY_SRIOV_RESET(r)) { - if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) { - retry_limit++; - goto retry; - } else - DRM_ERROR("GPU reset retry is beyond the retry limit\n"); - } + /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) + amdgpu_ras_resume(adev); - return r; + amdgpu_virt_ras_telemetry_post_reset(adev); + + return 0; } /** - * amdgpu_device_has_job_running - check if there is any job in mirror list + * amdgpu_device_has_job_running - check if there is any unfinished job * * @adev: amdgpu_device pointer * - * check if there is any job in mirror list + * check if there is any job running on the device when guest driver receives + * FLR notification from host driver. If there are still jobs running, then + * the guest driver will not respond the FLR reset. Instead, let the job hit + * the timeout and guest driver then issue the reset request. */ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) { int i; - struct drm_sched_job *job; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -5045,11 +5460,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) if (!amdgpu_ring_sched_ready(ring)) continue; - spin_lock(&ring->sched.job_list_lock); - job = list_first_entry_or_null(&ring->sched.pending_list, - struct drm_sched_job, list); - spin_unlock(&ring->sched.job_list_lock); - if (job) + if (amdgpu_fence_count_emitted(ring)) return true; } return false; @@ -5111,15 +5522,19 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) u32 i; int ret = 0; - amdgpu_atombios_scratch_regs_engine_hung(adev, true); + if (adev->bios) + amdgpu_atombios_scratch_regs_engine_hung(adev, true); dev_info(adev->dev, "GPU mode1 reset\n"); + /* Cache the state before bus master disable. The saved config space + * values are used in other cases like restore after mode-2 reset. + */ + amdgpu_device_cache_pci_state(adev->pdev); + /* disable BM */ pci_clear_master(adev->pdev); - amdgpu_device_cache_pci_state(adev->pdev); - if (amdgpu_dpm_is_mode1_reset_supported(adev)) { dev_info(adev->dev, "GPU smu mode1 reset\n"); ret = amdgpu_dpm_mode1_reset(adev); @@ -5150,7 +5565,8 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) goto mode1_reset_failed; } - amdgpu_atombios_scratch_regs_engine_hung(adev, false); + if (adev->bios) + amdgpu_atombios_scratch_regs_engine_hung(adev, false); return 0; @@ -5159,21 +5575,43 @@ mode1_reset_failed: return ret; } +int amdgpu_device_link_reset(struct amdgpu_device *adev) +{ + int ret = 0; + + dev_info(adev->dev, "GPU link reset\n"); + + if (!adev->pcie_reset_ctx.occurs_dpc) + ret = amdgpu_dpm_link_reset(adev); + + if (ret) + goto link_reset_failed; + + ret = amdgpu_psp_wait_for_bootloader(adev); + if (ret) + goto link_reset_failed; + + return 0; + +link_reset_failed: + dev_err(adev->dev, "GPU link reset failed\n"); + return ret; +} + int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { int i, r = 0; struct amdgpu_job *job = NULL; + struct amdgpu_device *tmp_adev = reset_context->reset_req_dev; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); if (reset_context->reset_req_dev == adev) job = reset_context->job; - if (amdgpu_sriov_vf(adev)) { - /* stop the data exchange thread */ - amdgpu_virt_fini_data_exchange(adev); - } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_pre_reset(adev); amdgpu_fence_driver_isr_toggle(adev, true); @@ -5222,6 +5660,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, } } + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { + dev_info(tmp_adev->dev, "Dumping IP State\n"); + /* Trigger ip dump before we reset the asic */ + for (i = 0; i < tmp_adev->num_ip_blocks; i++) + if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) + tmp_adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]); + dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); + } + if (need_full_reset) r = amdgpu_device_ip_suspend(adev); if (need_full_reset) @@ -5234,94 +5682,35 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, return r; } -static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) +int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) { - int i; + struct list_head *device_list_handle; + bool full_reset, vram_lost = false; + struct amdgpu_device *tmp_adev; + int r, init_level; - lockdep_assert_held(&adev->reset_domain->sem); + device_list_handle = reset_context->reset_device_list; - for (i = 0; i < adev->reset_info.num_regs; i++) { - adev->reset_info.reset_dump_reg_value[i] = - RREG32(adev->reset_info.reset_dump_reg_list[i]); - - trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], - adev->reset_info.reset_dump_reg_value[i]); - } - - return 0; -} - -int amdgpu_do_asic_reset(struct list_head *device_list_handle, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_device *tmp_adev = NULL; - bool need_full_reset, skip_hw_reset, vram_lost = false; - int r = 0; - - /* Try reset handler method first */ - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); - amdgpu_reset_reg_dumps(tmp_adev); - - reset_context->reset_device_list = device_list_handle; - r = amdgpu_reset_perform_reset(tmp_adev, reset_context); - /* If reset handler not implemented, continue; otherwise return */ - if (r == -EOPNOTSUPP) - r = 0; - else - return r; + if (!device_list_handle) + return -EINVAL; - /* Reset handler not implemented, use the default method */ - need_full_reset = - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); - skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); + full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); - /* - * ASIC reset has to be done on all XGMI hive nodes ASAP - * to allow proper links negotiation in FW (within 1 sec) + /** + * If it's reset on init, it's default init level, otherwise keep level + * as recovery level. */ - if (!skip_hw_reset && need_full_reset) { - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - /* For XGMI run all resets in parallel to speed up the process */ - if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - tmp_adev->gmc.xgmi.pending_reset = false; - if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) - r = -EALREADY; - } else - r = amdgpu_asic_reset(tmp_adev); - - if (r) { - dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s", - r, adev_to_drm(tmp_adev)->unique); - goto out; - } - } - - /* For XGMI wait for all resets to complete before proceed */ - if (!r) { - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - flush_work(&tmp_adev->xgmi_reset_work); - r = tmp_adev->asic_reset_res; - if (r) - break; - } - } - } - } - - if (!r && amdgpu_ras_intr_triggered()) { - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB); - } - - amdgpu_ras_intr_cleared(); - } + if (reset_context->method == AMD_RESET_METHOD_ON_INIT) + init_level = AMDGPU_INIT_LEVEL_DEFAULT; + else + init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY; + r = 0; list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (need_full_reset) { + amdgpu_set_init_level(tmp_adev, init_level); + if (full_reset) { /* post card */ - amdgpu_ras_set_fed(tmp_adev, false); + amdgpu_ras_clear_err_state(tmp_adev); r = amdgpu_device_asic_init(tmp_adev); if (r) { dev_warn(tmp_adev->dev, "asic atom init failed!"); @@ -5334,7 +5723,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); - amdgpu_coredump(tmp_adev, vram_lost, reset_context); + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); @@ -5357,6 +5747,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (tmp_adev->mman.buffer_funcs_ring->sched.ready) amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true); + r = amdgpu_device_ip_resume_phase3(tmp_adev); + if (r) + goto out; + if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); @@ -5374,7 +5768,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (r) goto out; - drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false); + drm_client_dev_resume(adev_to_drm(tmp_adev), false); /* * The GPU enters bad state once faulty pages @@ -5386,7 +5780,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, * bad_page_threshold value to fix this once * probing driver again. */ - if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { + if (!amdgpu_ras_is_rma(tmp_adev)) { /* must succeed. */ amdgpu_ras_resume(tmp_adev); } else { @@ -5404,27 +5798,102 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, out: if (!r) { + /* IP init is complete now, set level as default */ + amdgpu_set_init_level(tmp_adev, + AMDGPU_INIT_LEVEL_DEFAULT); amdgpu_irq_gpu_reset_resume_helper(tmp_adev); r = amdgpu_ib_ring_tests(tmp_adev); if (r) { dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); - need_full_reset = true; r = -EAGAIN; goto end; } } - if (!r) - r = amdgpu_device_recover_vram(tmp_adev); - else + if (r) tmp_adev->asic_reset_res = r; } end: - if (need_full_reset) + return r; +} + +int amdgpu_do_asic_reset(struct list_head *device_list_handle, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *tmp_adev = NULL; + bool need_full_reset, skip_hw_reset; + int r = 0; + + /* Try reset handler method first */ + tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + reset_list); + + reset_context->reset_device_list = device_list_handle; + r = amdgpu_reset_perform_reset(tmp_adev, reset_context); + /* If reset handler not implemented, continue; otherwise return */ + if (r == -EOPNOTSUPP) + r = 0; + else + return r; + + /* Reset handler not implemented, use the default method */ + need_full_reset = + test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); + + /* + * ASIC reset has to be done on all XGMI hive nodes ASAP + * to allow proper links negotiation in FW (within 1 sec) + */ + if (!skip_hw_reset && need_full_reset) { + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + /* For XGMI run all resets in parallel to speed up the process */ + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + if (!queue_work(system_unbound_wq, + &tmp_adev->xgmi_reset_work)) + r = -EALREADY; + } else + r = amdgpu_asic_reset(tmp_adev); + + if (r) { + dev_err(tmp_adev->dev, + "ASIC reset failed with error, %d for drm dev, %s", + r, adev_to_drm(tmp_adev)->unique); + goto out; + } + } + + /* For XGMI wait for all resets to complete before proceed */ + if (!r) { + list_for_each_entry(tmp_adev, device_list_handle, + reset_list) { + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + flush_work(&tmp_adev->xgmi_reset_work); + r = tmp_adev->asic_reset_res; + if (r) + break; + } + } + } + } + + if (!r && amdgpu_ras_intr_triggered()) { + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + amdgpu_ras_reset_error_count(tmp_adev, + AMDGPU_RAS_BLOCK__MMHUB); + } + + amdgpu_ras_intr_cleared(); + } + + r = amdgpu_device_reinit_after_reset(reset_context); + if (r == -EAGAIN) set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); else clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + +out: return r; } @@ -5433,6 +5902,7 @@ static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: + case AMD_RESET_METHOD_LINK: adev->mp1_state = PP_MP1_STATE_SHUTDOWN; break; case AMD_RESET_METHOD_MODE2: @@ -5532,85 +6002,93 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) } -/** - * amdgpu_device_gpu_recover - reset the asic and recover scheduler - * - * @adev: amdgpu_device pointer - * @job: which job trigger hang - * @reset_context: amdgpu reset context pointer - * - * Attempt to reset the GPU if it has hung (all asics). - * Attempt to do soft-reset or full-reset and reinitialize Asic - * Returns 0 for success or an error on failure. - */ - -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job *job, - struct amdgpu_reset_context *reset_context) +static int amdgpu_device_health_check(struct list_head *device_list_handle) { - struct list_head device_list, *device_list_handle = NULL; - bool job_signaled = false; - struct amdgpu_hive_info *hive = NULL; - struct amdgpu_device *tmp_adev = NULL; - int i, r = 0; - bool need_emergency_restart = false; - bool audio_suspended = false; - - /* - * Special case: RAS triggered and full reset isn't supported - */ - need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); - - /* - * Flush RAM to disk so that after reboot - * the user can read log and see why the system rebooted. - */ - if (need_emergency_restart && amdgpu_ras_get_context(adev) && - amdgpu_ras_get_context(adev)->reboot) { - DRM_WARN("Emergency reboot."); + struct amdgpu_device *tmp_adev; + int ret = 0; + u32 status; - ksys_sync_helper(); - emergency_restart(); + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status); + if (PCI_POSSIBLE_ERROR(status)) { + dev_err(tmp_adev->dev, "device lost from bus!"); + ret = -ENODEV; + } } - dev_info(adev->dev, "GPU %s begin!\n", - need_emergency_restart ? "jobs stop":"reset"); + return ret; +} - if (!amdgpu_sriov_vf(adev)) - hive = amdgpu_get_xgmi_hive(adev); - if (hive) - mutex_lock(&hive->hive_lock); +static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_hive_info *hive) +{ + struct amdgpu_device *tmp_adev = NULL; + int r; - reset_context->job = job; - reset_context->hive = hive; /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list * to put adev in the 1st position. */ - INIT_LIST_HEAD(&device_list); - if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { + if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - list_add_tail(&tmp_adev->reset_list, &device_list); + list_add_tail(&tmp_adev->reset_list, device_list); if (adev->shutdown) tmp_adev->shutdown = true; + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->pcie_reset_ctx.in_link_reset = true; } - if (!list_is_first(&adev->reset_list, &device_list)) - list_rotate_to_front(&adev->reset_list, &device_list); - device_list_handle = &device_list; + if (!list_is_first(&adev->reset_list, device_list)) + list_rotate_to_front(&adev->reset_list, device_list); } else { - list_add_tail(&adev->reset_list, &device_list); - device_list_handle = &device_list; + list_add_tail(&adev->reset_list, device_list); } - /* We need to lock reset domain only once both for XGMI and single device */ - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); + if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { + r = amdgpu_device_health_check(device_list); + if (r) + return r; + } + + return 0; +} + +static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); +} - /* block all schedulers and reset given job's ring */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { +static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +} + +static int amdgpu_device_halt_activities( + struct amdgpu_device *adev, struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context, + struct list_head *device_list, struct amdgpu_hive_info *hive, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; + + /* block all schedulers and reset given job's ring */ + list_for_each_entry(tmp_adev, device_list, reset_list) { amdgpu_device_set_mp1_state(tmp_adev); /* @@ -5624,25 +6102,25 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * some audio codec errors. */ if (!amdgpu_device_suspend_display_audio(tmp_adev)) - audio_suspended = true; + tmp_adev->pcie_reset_ctx.audio_suspended = true; amdgpu_ras_set_error_query_ready(tmp_adev, false); cancel_delayed_work_sync(&tmp_adev->delayed_init_work); - if (!amdgpu_sriov_vf(tmp_adev)) - amdgpu_amdkfd_pre_reset(tmp_adev); + amdgpu_amdkfd_pre_reset(tmp_adev, reset_context); /* - * Mark these ASICs to be reseted as untracked first + * Mark these ASICs to be reset as untracked first * And add them back after reset completed */ amdgpu_unregister_gpu_instance(tmp_adev); - drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true); + drm_client_dev_suspend(adev_to_drm(tmp_adev), false); /* disable ras on ALL IPs */ if (!need_emergency_restart && + (!adev->pcie_reset_ctx.occurs_dpc) && amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); @@ -5660,61 +6138,81 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, atomic_inc(&tmp_adev->gpu_reset_counter); } - if (need_emergency_restart) - goto skip_sched_resume; + return r; +} - /* - * Must check guilty signal here since after this point all old - * HW fences are force signaled. - * - * job->base holds a reference to parent fence - */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { - job_signaled = true; - dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); - goto skip_hw_reset; - } +static int amdgpu_device_asic_reset(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *tmp_adev = NULL; + int retry_limit = AMDGPU_MAX_RETRY_LIMIT; + int r = 0; retry: /* Rest of adevs pre asic reset from XGMI hive. */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->no_hw_access = false; /*TODO Should we stop ?*/ if (r) { dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", r, adev_to_drm(tmp_adev)->unique); tmp_adev->asic_reset_res = r; } - - /* - * Drop all pending non scheduler resets. Scheduler resets - * were already dropped during drm_sched_stop - */ - amdgpu_device_stop_pending_resets(tmp_adev); } /* Actual ASIC resets if needed.*/ /* Host driver will handle XGMI hive reset for SRIOV */ if (amdgpu_sriov_vf(adev)) { - r = amdgpu_device_reset_sriov(adev, job ? false : true); + + /* Bail out of reset early */ + if (amdgpu_ras_is_rma(adev)) + return -ENODEV; + + if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) { + dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n"); + amdgpu_ras_set_fed(adev, true); + set_bit(AMDGPU_HOST_FLR, &reset_context->flags); + } + + r = amdgpu_device_reset_sriov(adev, reset_context); + if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) { + amdgpu_virt_release_full_gpu(adev, true); + goto retry; + } if (r) adev->asic_reset_res = r; - - /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) - amdgpu_ras_resume(adev); } else { - r = amdgpu_do_asic_reset(device_list_handle, reset_context); + r = amdgpu_do_asic_reset(device_list, reset_context); if (r && r == -EAGAIN) goto retry; } -skip_hw_reset: + list_for_each_entry(tmp_adev, device_list, reset_list) { + /* + * Drop any pending non scheduler resets queued before reset is done. + * Any reset scheduled after this point would be valid. Scheduler resets + * were already dropped during drm_sched_stop and no new ones can come + * in before drm_sched_start. + */ + amdgpu_device_stop_pending_resets(tmp_adev); + } + + return r; +} + +static int amdgpu_device_sched_resume(struct list_head *device_list, + struct amdgpu_reset_context *reset_context, + bool job_signaled) +{ + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; /* Post ASIC reset for all devs .*/ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -5722,7 +6220,7 @@ skip_hw_reset: if (!amdgpu_ring_sched_ready(ring)) continue; - drm_sched_start(&ring->sched, true); + drm_sched_start(&ring->sched, 0); } if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) @@ -5734,8 +6232,14 @@ skip_hw_reset: tmp_adev->asic_reset_res = 0; if (r) { - /* bad news, how to tell it to userspace ? */ - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); + /* bad news, how to tell it to userspace ? + * for ras error, we should report GPU bad status instead of + * reset failure + */ + if (reset_context->src != AMDGPU_RESET_SRC_RAS || + !amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", + atomic_read(&tmp_adev->gpu_reset_counter)); amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); @@ -5744,8 +6248,16 @@ skip_hw_reset: } } -skip_sched_resume: - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + return r; +} + +static void amdgpu_device_gpu_resume(struct amdgpu_device *adev, + struct list_head *device_list, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + + list_for_each_entry(tmp_adev, device_list, reset_list) { /* unlock kfd: SRIOV would do it separately */ if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); @@ -5756,18 +6268,118 @@ skip_sched_resume: if (!adev->kfd.init_complete) amdgpu_amdkfd_device_init(adev); - if (audio_suspended) + if (tmp_adev->pcie_reset_ctx.audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); amdgpu_device_unset_mp1_state(tmp_adev); amdgpu_ras_set_error_query_ready(tmp_adev, true); + } +} - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); - amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +/** + * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * + * @adev: amdgpu_device pointer + * @job: which job trigger hang + * @reset_context: amdgpu reset context pointer + * + * Attempt to reset the GPU if it has hung (all asics). + * Attempt to do soft-reset or full-reset and reinitialize Asic + * Returns 0 for success or an error on failure. + */ + +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context) +{ + struct list_head device_list; + bool job_signaled = false; + struct amdgpu_hive_info *hive = NULL; + int r = 0; + bool need_emergency_restart = false; + + /* + * If it reaches here because of hang/timeout and a RAS error is + * detected at the same time, let RAS recovery take care of it. + */ + if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && + !amdgpu_sriov_vf(adev) && + reset_context->src != AMDGPU_RESET_SRC_RAS) { + dev_dbg(adev->dev, + "Gpu recovery from source: %d yielding to RAS error recovery handling", + reset_context->src); + return 0; + } + + /* + * Special case: RAS triggered and full reset isn't supported + */ + need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); + + /* + * Flush RAM to disk so that after reboot + * the user can read log and see why the system rebooted. + */ + if (need_emergency_restart && amdgpu_ras_get_context(adev) && + amdgpu_ras_get_context(adev)->reboot) { + DRM_WARN("Emergency reboot."); + + ksys_sync_helper(); + emergency_restart(); + } + + dev_info(adev->dev, "GPU %s begin!\n", + need_emergency_restart ? "jobs stop":"reset"); + + if (!amdgpu_sriov_vf(adev)) + hive = amdgpu_get_xgmi_hive(adev); + if (hive) + mutex_lock(&hive->hive_lock); + + reset_context->job = job; + reset_context->hive = hive; + INIT_LIST_HEAD(&device_list); + + if (amdgpu_device_recovery_prepare(adev, &device_list, hive)) + goto end_reset; + + /* We need to lock reset domain only once both for XGMI and single device */ + amdgpu_device_recovery_get_reset_lock(adev, &device_list); + + r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list, + hive, need_emergency_restart); + if (r) + goto reset_unlock; + + if (need_emergency_restart) + goto skip_sched_resume; + /* + * Must check guilty signal here since after this point all old + * HW fences are force signaled. + * + * job->base holds a reference to parent fence + */ + if (job && dma_fence_is_signaled(&job->hw_fence.base)) { + job_signaled = true; + dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); + goto skip_hw_reset; + } + + r = amdgpu_device_asic_reset(adev, &device_list, reset_context); + if (r) + goto reset_unlock; +skip_hw_reset: + r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled); + if (r) + goto reset_unlock; +skip_sched_resume: + amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart); +reset_unlock: + amdgpu_device_recovery_put_reset_lock(adev, &device_list); +end_reset: if (hive) { mutex_unlock(&hive->hive_lock); amdgpu_put_xgmi_hive(hive); @@ -5777,6 +6389,10 @@ skip_sched_resume: dev_info(adev->dev, "GPU reset end with ret = %d\n", r); atomic_set(&adev->reset_domain->reset_res, r); + + if (!r) + drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE); + return r; } @@ -5803,13 +6419,56 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; - while ((parent = pci_upstream_bridge(parent))) { - /* skip upstream/downstream switches internal to dGPU*/ - if (parent->vendor == PCI_VENDOR_ID_ATI) - continue; + if (amdgpu_device_pcie_dynamic_switching_supported(adev)) { + while ((parent = pci_upstream_bridge(parent))) { + /* skip upstream/downstream switches internal to dGPU*/ + if (parent->vendor == PCI_VENDOR_ID_ATI) + continue; + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + break; + } + } else { + /* use the current speeds rather than max if switching is not supported */ + pcie_bandwidth_available(adev->pdev, NULL, speed, width); + } +} + +/** + * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU + * + * @adev: amdgpu_device pointer + * @speed: pointer to the speed of the link + * @width: pointer to the width of the link + * + * Evaluate the hierarchy to find the speed and bandwidth capabilities of the + * AMD dGPU which may be a virtual upstream bridge. + */ +static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *parent = adev->pdev; + + if (!speed || !width) + return; + + parent = pci_upstream_bridge(parent); + if (parent && parent->vendor == PCI_VENDOR_ID_ATI) { + /* use the upstream/downstream switches internal to dGPU */ *speed = pcie_get_speed_cap(parent); *width = pcie_get_width_cap(parent); - break; + while ((parent = pci_upstream_bridge(parent))) { + if (parent->vendor == PCI_VENDOR_ID_ATI) { + /* use the upstream/downstream switches internal to dGPU */ + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + } + } + } else { + /* use the device itself */ + *speed = pcie_get_speed_cap(adev->pdev); + *width = pcie_get_width_cap(adev->pdev); } } @@ -5818,15 +6477,14 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * - * Fetchs and stores in the driver the PCIE capabilities (gen speed + * Fetches and stores in the driver the PCIE capabilities (gen speed * and lanes) of the slot the device is in. Handles APUs and * virtualized environments where PCIE config space may not be available. */ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { - struct pci_dev *pdev; enum pci_bus_speed speed_cap, platform_speed_cap; - enum pcie_link_width platform_link_width; + enum pcie_link_width platform_link_width, link_width; if (amdgpu_pcie_gen_cap) adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -5848,11 +6506,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) amdgpu_device_partner_bandwidth(adev, &platform_speed_cap, &platform_link_width); + amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ - pdev = adev->pdev; - speed_cap = pcie_get_speed_cap(pdev); if (speed_cap == PCI_SPEED_UNKNOWN) { adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | @@ -5908,51 +6565,103 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } if (adev->pm.pcie_mlw_mask == 0) { + /* asic caps */ + if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { + adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK; + } else { + switch (link_width) { + case PCIE_LNK_X32: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X16: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X12: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X8: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X4: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X2: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X1: + adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1; + break; + default: + break; + } + } + /* platform caps */ if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; } else { switch (platform_link_width) { case PCIE_LNK_X32: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X16: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X12: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X8: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X4: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X2: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X1: - adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; + adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; break; default: break; @@ -5975,18 +6684,27 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev) { #ifdef CONFIG_HSA_AMD_P2P - uint64_t address_mask = peer_adev->dev->dma_mask ? - ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); - resource_size_t aper_limit = - adev->gmc.aper_base + adev->gmc.aper_size - 1; bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); + if (!p2p_access) + dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n", + pci_name(peer_adev->pdev)); + + bool is_large_bar = adev->gmc.visible_vram_size && + adev->gmc.real_vram_size == adev->gmc.visible_vram_size; + bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev); - return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && - adev->gmc.real_vram_size == adev->gmc.visible_vram_size && - !(adev->gmc.aper_base & address_mask || - aper_limit & address_mask)); + if (!p2p_addressable) { + uint64_t address_mask = peer_adev->dev->dma_mask ? + ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); + resource_size_t aper_limit = + adev->gmc.aper_base + adev->gmc.aper_size - 1; + + p2p_addressable = !(adev->gmc.aper_base & address_mask || + aper_limit & address_mask); + } + return pcie_p2p && is_large_bar && p2p_access && p2p_addressable; #else return false; #endif @@ -6024,7 +6742,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev) adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); - if (amdgpu_passthrough(adev) && + if (amdgpu_passthrough(adev) && adev->nbio.funcs && adev->nbio.funcs->clear_doorbell_interrupt) adev->nbio.funcs->clear_doorbell_interrupt(adev); @@ -6044,12 +6762,15 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int i; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + struct amdgpu_reset_context reset_context; + struct list_head device_list; + int r = 0; - DRM_INFO("PCI error: detected callback, state(%d)!!\n", state); + dev_info(adev->dev, "PCI error: detected callback!!\n"); - if (adev->gmc.xgmi.num_physical_nodes > 1) { - DRM_WARN("No support for XGMI hive yet..."); + if (!amdgpu_dpm_is_link_reset_supported(adev)) { + dev_warn(adev->dev, "No support for XGMI hive yet...\n"); return PCI_ERS_RESULT_DISCONNECT; } @@ -6057,32 +6778,32 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta switch (state) { case pci_channel_io_normal: + dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state); return PCI_ERS_RESULT_CAN_RECOVER; - /* Fatal error, prepare for slot reset */ case pci_channel_io_frozen: - /* - * Locking adev->reset_domain->sem will prevent any external access - * to GPU during PCI error recovery - */ - amdgpu_device_lock_reset_domain(adev->reset_domain); - amdgpu_device_set_mp1_state(adev); - - /* - * Block any work scheduling as we do for regular GPU reset - * for the duration of the recovery - */ - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!amdgpu_ring_sched_ready(ring)) - continue; - - drm_sched_stop(&ring->sched, NULL); + /* Fatal error, prepare for slot reset */ + dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state); + + if (hive) + mutex_lock(&hive->hive_lock); + adev->pcie_reset_ctx.occurs_dpc = true; + memset(&reset_context, 0, sizeof(reset_context)); + INIT_LIST_HEAD(&device_list); + + amdgpu_device_recovery_prepare(adev, &device_list, hive); + amdgpu_device_recovery_get_reset_lock(adev, &device_list); + r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, + hive, false); + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); } - atomic_inc(&adev->gpu_reset_counter); + if (r) + return PCI_ERS_RESULT_DISCONNECT; return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: /* Permanent error, prepare for device removal */ + dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state); return PCI_ERS_RESULT_DISCONNECT; } @@ -6095,8 +6816,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta */ pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) { + struct drm_device *dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(dev); - DRM_INFO("PCI error: mmio enabled callback!!\n"); + dev_info(adev->dev, "PCI error: mmio enabled callback!!\n"); /* TODO - dump whatever for debugging purposes */ @@ -6120,34 +6843,25 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int r, i; struct amdgpu_reset_context reset_context; - u32 memsize; - struct list_head device_list; + struct amdgpu_device *tmp_adev; struct amdgpu_hive_info *hive; - int hive_ras_recovery = 0; - struct amdgpu_ras *ras; + struct list_head device_list; + int r = 0, i; + u32 memsize; /* PCI error slot reset should be skipped During RAS recovery */ - hive = amdgpu_get_xgmi_hive(adev); - if (hive) { - hive_ras_recovery = atomic_read(&hive->ras_recovery); - amdgpu_put_xgmi_hive(hive); - } - ras = amdgpu_ras_get_context(adev); - if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) && - ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) && + amdgpu_ras_in_recovery(adev)) return PCI_ERS_RESULT_RECOVERED; - DRM_INFO("PCI error: slot reset callback!!\n"); + dev_info(adev->dev, "PCI error: slot reset callback!!\n"); memset(&reset_context, 0, sizeof(reset_context)); - INIT_LIST_HEAD(&device_list); - list_add_tail(&adev->reset_list, &device_list); - /* wait for asic to come out of reset */ - msleep(500); + msleep(700); /* Restore PCI confspace */ amdgpu_device_load_pci_state(pdev); @@ -6168,26 +6882,40 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - - adev->no_hw_access = true; - r = amdgpu_device_pre_asic_reset(adev, &reset_context); - adev->no_hw_access = false; - if (r) - goto out; + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); + INIT_LIST_HEAD(&device_list); - r = amdgpu_do_asic_reset(&device_list, &reset_context); + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + reset_context.hive = hive; + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + tmp_adev->pcie_reset_ctx.in_link_reset = true; + list_add_tail(&tmp_adev->reset_list, &device_list); + } + } else { + set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + list_add_tail(&adev->reset_list, &device_list); + } + r = amdgpu_device_asic_reset(adev, &device_list, &reset_context); out: if (!r) { if (amdgpu_device_cache_pci_state(adev->pdev)) pci_restore_state(adev->pdev); - - DRM_INFO("PCIe error recovery succeeded\n"); + dev_info(adev->dev, "PCIe error recovery succeeded\n"); } else { - DRM_ERROR("PCIe error recovery failed, err:%d", r); - amdgpu_device_unset_mp1_state(adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); + dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r); + if (hive) { + list_for_each_entry(tmp_adev, &device_list, reset_list) + amdgpu_device_unset_mp1_state(tmp_adev); + } + amdgpu_device_recovery_put_reset_lock(adev, &device_list); + } + + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); } return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; @@ -6204,26 +6932,37 @@ void amdgpu_pci_resume(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int i; - + struct list_head device_list; + struct amdgpu_hive_info *hive = NULL; + struct amdgpu_device *tmp_adev = NULL; - DRM_INFO("PCI error: resume callback!!\n"); + dev_info(adev->dev, "PCI error: resume callback!!\n"); /* Only continue execution for the case of pci_channel_io_frozen */ if (adev->pci_channel_state != pci_channel_io_frozen) return; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + INIT_LIST_HEAD(&device_list); - if (!amdgpu_ring_sched_ready(ring)) - continue; + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + tmp_adev->pcie_reset_ctx.in_link_reset = false; + list_add_tail(&tmp_adev->reset_list, &device_list); + } + } else + list_add_tail(&adev->reset_list, &device_list); - drm_sched_start(&ring->sched, true); - } + amdgpu_device_sched_resume(&device_list, NULL, NULL); + amdgpu_device_gpu_resume(adev, &device_list, false); + amdgpu_device_recovery_put_reset_lock(adev, &device_list); + adev->pcie_reset_ctx.occurs_dpc = false; - amdgpu_device_unset_mp1_state(adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); + } } bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) @@ -6232,6 +6971,9 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) struct amdgpu_device *adev = drm_to_adev(dev); int r; + if (amdgpu_sriov_vf(adev)) + return false; + r = pci_save_state(pdev); if (!r) { kfree(adev->pci_state); @@ -6379,6 +7121,22 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, } /** + * amdgpu_device_get_gang - return a reference to the current gang + * @adev: amdgpu_device pointer + * + * Returns: A new reference to the current gang leader. + */ +struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev) +{ + struct dma_fence *fence; + + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&adev->gang_submit); + rcu_read_unlock(); + return fence; +} + +/** * amdgpu_device_switch_gang - switch to a new gang * @adev: amdgpu_device pointer * @gang: the gang to switch to @@ -6392,25 +7150,117 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, { struct dma_fence *old = NULL; + dma_fence_get(gang); do { dma_fence_put(old); - rcu_read_lock(); - old = dma_fence_get_rcu_safe(&adev->gang_submit); - rcu_read_unlock(); - + old = amdgpu_device_get_gang(adev); if (old == gang) break; - if (!dma_fence_is_signaled(old)) + if (!dma_fence_is_signaled(old)) { + dma_fence_put(gang); return old; + } } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, old, gang) != old); + /* + * Drop it once for the exchanged reference in adev and once for the + * thread local reference acquired in amdgpu_device_get_gang(). + */ + dma_fence_put(old); dma_fence_put(old); return NULL; } +/** + * amdgpu_device_enforce_isolation - enforce HW isolation + * @adev: the amdgpu device pointer + * @ring: the HW ring the job is supposed to run on + * @job: the job which is about to be pushed to the HW ring + * + * Makes sure that only one client at a time can use the GFX block. + * Returns: The dependency to wait on before the job can be pushed to the HW. + * The function is called multiple times until NULL is returned. + */ +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_job *job) +{ + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; + struct drm_sched_fence *f = job->base.s_fence; + struct dma_fence *dep; + void *owner; + int r; + + /* + * For now enforce isolation only for the GFX block since we only need + * the cleaner shader on those rings. + */ + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX && + ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return NULL; + + /* + * All submissions where enforce isolation is false are handled as if + * they come from a single client. Use ~0l as the owner to distinct it + * from kernel submissions where the owner is NULL. + */ + owner = job->enforce_isolation ? f->owner : (void *)~0l; + + mutex_lock(&adev->enforce_isolation_mutex); + + /* + * The "spearhead" submission is the first one which changes the + * ownership to its client. We always need to wait for it to be + * pushed to the HW before proceeding with anything. + */ + if (&f->scheduled != isolation->spearhead && + !dma_fence_is_signaled(isolation->spearhead)) { + dep = isolation->spearhead; + goto out_grab_ref; + } + + if (isolation->owner != owner) { + + /* + * Wait for any gang to be assembled before switching to a + * different owner or otherwise we could deadlock the + * submissions. + */ + if (!job->gang_submit) { + dep = amdgpu_device_get_gang(adev); + if (!dma_fence_is_signaled(dep)) + goto out_return_dep; + dma_fence_put(dep); + } + + dma_fence_put(isolation->spearhead); + isolation->spearhead = dma_fence_get(&f->scheduled); + amdgpu_sync_move(&isolation->active, &isolation->prev); + trace_amdgpu_isolation(isolation->owner, owner); + isolation->owner = owner; + } + + /* + * Specifying the ring here helps to pipeline submissions even when + * isolation is enabled. If that is not desired for testing NULL can be + * used instead of the ring to enforce a CPU round trip while switching + * between clients. + */ + dep = amdgpu_sync_peek_fence(&isolation->prev, ring); + r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT); + if (r) + DRM_WARN("OOM tracking isolation\n"); + +out_grab_ref: + dma_fence_get(dep); +out_return_dep: + mutex_unlock(&adev->enforce_isolation_mutex); + return dep; +} + bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -6479,3 +7329,47 @@ uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, } return ret; } + +ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring) +{ + ssize_t size = 0; + + if (!ring || !ring->adev) + return size; + + if (amdgpu_device_should_recover_gpu(ring->adev)) + size |= AMDGPU_RESET_TYPE_FULL; + + if (unlikely(!ring->adev->debug_disable_soft_recovery) && + !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery) + size |= AMDGPU_RESET_TYPE_SOFT_RESET; + + return size; +} + +ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset) +{ + ssize_t size = 0; + + if (supported_reset == 0) { + size += sysfs_emit_at(buf, size, "unsupported"); + size += sysfs_emit_at(buf, size, "\n"); + return size; + + } + + if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET) + size += sysfs_emit_at(buf, size, "soft "); + + if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) + size += sysfs_emit_at(buf, size, "queue "); + + if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE) + size += sysfs_emit_at(buf, size, "pipe "); + + if (supported_reset & AMDGPU_RESET_TYPE_FULL) + size += sysfs_emit_at(buf, size, "full "); + + size += sysfs_emit_at(buf, size, "\n"); + return size; +} |