summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorMaxime Ripard <mripard@kernel.org>2023-11-15 10:45:19 +0100
committerMaxime Ripard <mripard@kernel.org>2023-11-15 10:56:44 +0100
commit3bf3e21c15d4386a5f15118ec39bbc1b67ea5759 (patch)
treea8880bb7b152d913ebd86e7cd858600dbe22ed38 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parent34b98a5f7a185c19715cc98c57d7e27b4785dfdf (diff)
parentb85ea95d086471afb4ad062012a4d73cd328fa86 (diff)
Merge drm/drm-next into drm-misc-next
Let's kickstart the v6.8 release cycle. Signed-off-by: Maxime Ripard <mripard@kernel.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c606
1 files changed, 457 insertions, 149 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 203dfcea82b1..2a6684a38714 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -32,8 +32,6 @@
#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/pci.h>
-#include <linux/devcoredump.h>
-#include <generated/utsrelease.h>
#include <linux/pci-p2pdma.h>
#include <linux/apple-gmux.h>
@@ -43,6 +41,7 @@
#include <drm/drm_fb_helper.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
+#include <linux/device.h>
#include <linux/vgaarb.h>
#include <linux/vga_switcheroo.h>
#include <linux/efi.h>
@@ -74,6 +73,7 @@
#include "amdgpu_pmu.h"
#include "amdgpu_fru_eeprom.h"
#include "amdgpu_reset.h"
+#include "amdgpu_virt.h"
#include <linux/suspend.h>
#include <drm/task_barrier.h>
@@ -162,6 +162,74 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
static DEVICE_ATTR(pcie_replay_count, 0444,
amdgpu_device_get_pcie_replay_count, NULL);
+/**
+ * DOC: board_info
+ *
+ * The amdgpu driver provides a sysfs API for giving board related information.
+ * It provides the form factor information in the format
+ *
+ * type : form factor
+ *
+ * Possible form factor values
+ *
+ * - "cem" - PCIE CEM card
+ * - "oam" - Open Compute Accelerator Module
+ * - "unknown" - Not known
+ *
+ */
+
+static ssize_t amdgpu_device_get_board_info(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
+ const char *pkg;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
+ pkg_type = adev->smuio.funcs->get_pkg_type(adev);
+
+ switch (pkg_type) {
+ case AMDGPU_PKG_TYPE_CEM:
+ pkg = "cem";
+ break;
+ case AMDGPU_PKG_TYPE_OAM:
+ pkg = "oam";
+ break;
+ default:
+ pkg = "unknown";
+ break;
+ }
+
+ return sysfs_emit(buf, "%s : %s\n", "type", pkg);
+}
+
+static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
+
+static struct attribute *amdgpu_board_attrs[] = {
+ &dev_attr_board_info.attr,
+ NULL,
+};
+
+static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group amdgpu_board_attrs_group = {
+ .attrs = amdgpu_board_attrs,
+ .is_visible = amdgpu_board_attrs_is_visible
+};
+
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
@@ -405,7 +473,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(&adev->reset_domain->sem)) {
- ret = amdgpu_kiq_rreg(adev, reg);
+ ret = amdgpu_kiq_rreg(adev, reg, 0);
up_read(&adev->reset_domain->sem);
} else {
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@@ -442,6 +510,49 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
BUG();
}
+
+/**
+ * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t acc_flags,
+ uint32_t xcc_id)
+{
+ uint32_t ret, rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+ GC_HWIP, false,
+ &rlcg_flag)) {
+ ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ ret = adev->pcie_rreg(adev, reg * 4);
+ }
+
+ return ret;
+}
+
/*
* MMIO register write with bytes helper functions
* @offset:bytes offset from MMIO start
@@ -489,7 +600,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(&adev->reset_domain->sem)) {
- amdgpu_kiq_wreg(adev, reg, v);
+ amdgpu_kiq_wreg(adev, reg, v, 0);
up_read(&adev->reset_domain->sem);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
@@ -507,6 +618,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
* @adev: amdgpu_device pointer
* @reg: mmio/rlc register
* @v: value to write
+ * @xcc_id: xcc accelerated compute core id
*
* this function is invoked only for the debugfs register access
*/
@@ -530,6 +642,47 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
}
/**
+ * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t acc_flags, uint32_t xcc_id)
+{
+ uint32_t rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+ GC_HWIP, true,
+ &rlcg_flag)) {
+ amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_kiq_wreg(adev, reg, v, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ adev->pcie_wreg(adev, reg * 4, v);
+ }
+}
+
+/**
* amdgpu_device_indirect_rreg - read an indirect register
*
* @adev: amdgpu_device pointer
@@ -571,7 +724,7 @@ u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- if (adev->nbio.funcs->get_pcie_index_hi_offset)
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
else
pcie_index_hi = 0;
@@ -638,6 +791,56 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
return r;
}
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+ u64 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ /* read low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ r = readl(pcie_data_offset);
+ /* read high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ r |= ((u64)readl(pcie_data_offset) << 32);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
+}
+
/**
* amdgpu_device_indirect_wreg - write an indirect register address
*
@@ -677,7 +880,7 @@ void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- if (adev->nbio.funcs->get_pcie_index_hi_offset)
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
else
pcie_index_hi = 0;
@@ -742,6 +945,55 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u64 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ /* write low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
+ readl(pcie_data_offset);
+ /* write high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data >> 32), pcie_data_offset);
+ readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
/**
* amdgpu_device_get_rev_id - query device rev_id
*
@@ -819,6 +1071,13 @@ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
return 0;
}
+static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+ DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
+ BUG();
+ return 0;
+}
+
/**
* amdgpu_invalid_wreg64 - dummy reg write function
*
@@ -836,6 +1095,13 @@ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint
BUG();
}
+static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
+{
+ DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
+ reg, v);
+ BUG();
+}
+
/**
* amdgpu_block_invalid_rreg - dummy reg read function
*
@@ -889,10 +1155,12 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
amdgpu_asic_pre_asic_init(adev);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
- adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
amdgpu_psp_wait_for_bootloader(adev);
ret = amdgpu_atomfirmware_asic_init(adev, true);
+ /* TODO: check the return val and stop device initialization if boot fails */
+ amdgpu_psp_query_boot_status(adev);
return ret;
} else {
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1245,14 +1513,45 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
}
/*
- * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
- * speed switching. Until we have confirmation from Intel that a specific host
- * supports it, it's safer that we keep it disabled for all.
+ * Check whether seamless boot is supported.
+ *
+ * So far we only support seamless boot on DCE 3.0 or later.
+ * If users report that it works on older ASICS as well, we may
+ * loosen this.
+ */
+bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
+{
+ switch (amdgpu_seamless) {
+ case -1:
+ break;
+ case 1:
+ return true;
+ case 0:
+ return false;
+ default:
+ DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
+ amdgpu_seamless);
+ return false;
+ }
+
+ if (!(adev->flags & AMD_IS_APU))
+ return false;
+
+ if (adev->mman.keep_stolen_vga_memory)
+ return false;
+
+ return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
+}
+
+/*
+ * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
+ * don't support dynamic speed switching. Until we have confirmation from Intel
+ * that a specific host supports it, it's safer that we keep it disabled for all.
*
* https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
* https://gitlab.freedesktop.org/drm/amd/-/issues/2663
*/
-bool amdgpu_device_pcie_dynamic_switching_supported(void)
+static bool amdgpu_device_pcie_dynamic_switching_supported(void)
{
#if IS_ENABLED(CONFIG_X86)
struct cpuinfo_x86 *c = &cpu_data(0);
@@ -1285,20 +1584,13 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
default:
return false;
}
+ if (adev->flags & AMD_IS_APU)
+ return false;
+ if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
+ return false;
return pcie_aspm_enabled(adev->pdev);
}
-bool amdgpu_device_aspm_support_quirk(void)
-{
-#if IS_ENABLED(CONFIG_X86)
- struct cpuinfo_x86 *c = &cpu_data(0);
-
- return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
-#else
- return true;
-#endif
-}
-
/* if we get transitioned to only one device, take VGA back */
/**
* amdgpu_device_vga_set_decode - enable/disable vga decode
@@ -1547,6 +1839,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
} else {
pr_info("switched off\n");
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ amdgpu_device_prepare(dev);
amdgpu_device_suspend(dev, true);
amdgpu_device_cache_pci_state(pdev);
/* Shut down the device */
@@ -2018,7 +2311,6 @@ out:
*/
static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
{
- struct drm_device *dev = adev_to_drm(adev);
struct pci_dev *parent;
int i, r;
bool total;
@@ -2089,11 +2381,11 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
(amdgpu_is_atpx_hybrid() ||
amdgpu_has_atpx_dgpu_power_cntl()) &&
((adev->flags & AMD_IS_APU) == 0) &&
- !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
+ !dev_is_removable(&adev->pdev->dev))
adev->flags |= AMD_IS_PX;
if (!(adev->flags & AMD_IS_APU)) {
- parent = pci_upstream_bridge(adev->pdev);
+ parent = pcie_find_root_port(adev->pdev);
adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
}
@@ -2103,6 +2395,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
+ if (!amdgpu_device_pcie_dynamic_switching_supported())
+ adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
total = true;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -2290,6 +2584,18 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
ring->name);
return r;
}
+ r = amdgpu_uvd_entity_init(adev, ring);
+ if (r) {
+ DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ r = amdgpu_vce_entity_init(adev, ring);
+ if (r) {
+ DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
+ ring->name);
+ return r;
+ }
}
amdgpu_xcp_update_partition_sched_list(adev);
@@ -2735,7 +3041,7 @@ static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
{
int i, r;
- if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
return;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -2988,8 +3294,10 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
if (adev->in_s0ix &&
- (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
- (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+ (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 0, 0)) &&
+ (adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
continue;
/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
@@ -3371,9 +3679,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
if (adev->asic_reset_res)
goto fail;
- if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
- adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
- adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
} else {
task_barrier_full(&hive->tb);
@@ -3485,8 +3791,8 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
adev->gfx.mcbp = true;
else if (amdgpu_mcbp == 0)
adev->gfx.mcbp = false;
- else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
+ else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
adev->gfx.num_gfx_rings)
adev->gfx.mcbp = true;
@@ -3551,6 +3857,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->pciep_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
+ adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
+ adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
adev->didt_rreg = &amdgpu_invalid_rreg;
@@ -3606,6 +3914,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(&adev->ras_list);
+ INIT_LIST_HEAD(&adev->pm.od_kobj_list);
+
INIT_DELAYED_WORK(&adev->delayed_init_work,
amdgpu_device_delayed_init_work_handler);
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
@@ -3702,7 +4012,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* internal path natively support atomics, set have_atomics_support to true.
*/
} else if ((adev->flags & AMD_IS_APU) &&
- (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >
+ IP_VERSION(9, 0, 0))) {
adev->have_atomics_support = true;
} else {
adev->have_atomics_support =
@@ -3750,13 +4061,23 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
}
} else {
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ r = psp_gpu_reset(adev);
+ break;
+ default:
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ break;
+ }
+
if (r) {
dev_err(adev->dev, "asic reset on init failed\n");
goto failed;
@@ -3842,22 +4163,6 @@ fence_driver_init:
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
- r = amdgpu_atombios_sysfs_init(adev);
- if (r)
- drm_err(&adev->ddev,
- "registering atombios sysfs failed (%d).\n", r);
-
- r = amdgpu_pm_sysfs_init(adev);
- if (r)
- DRM_ERROR("registering pm sysfs failed (%d).\n", r);
-
- r = amdgpu_ucode_sysfs_init(adev);
- if (r) {
- adev->ucode_sysfs_en = false;
- DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
- } else
- adev->ucode_sysfs_en = true;
-
/*
* Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
* Otherwise the mgpu fan boost feature will be skipped due to the
@@ -3886,10 +4191,36 @@ fence_driver_init:
flush_delayed_work(&adev->delayed_init_work);
}
+ /*
+ * Place those sysfs registering after `late_init`. As some of those
+ * operations performed in `late_init` might affect the sysfs
+ * interfaces creating.
+ */
+ r = amdgpu_atombios_sysfs_init(adev);
+ if (r)
+ drm_err(&adev->ddev,
+ "registering atombios sysfs failed (%d).\n", r);
+
+ r = amdgpu_pm_sysfs_init(adev);
+ if (r)
+ DRM_ERROR("registering pm sysfs failed (%d).\n", r);
+
+ r = amdgpu_ucode_sysfs_init(adev);
+ if (r) {
+ adev->ucode_sysfs_en = false;
+ DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
+
r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
if (r)
dev_err(adev->dev, "Could not create amdgpu device attr\n");
+ r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
+ if (r)
+ dev_err(adev->dev,
+ "Could not create amdgpu board attributes\n");
+
amdgpu_fru_sysfs_init(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
@@ -3910,7 +4241,7 @@ fence_driver_init:
px = amdgpu_device_supports_px(ddev);
- if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
apple_gmux_detect(NULL, NULL)))
vga_switcheroo_register_client(adev->pdev,
&amdgpu_switcheroo_ops, px);
@@ -4055,9 +4386,12 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
kfree(adev->bios);
adev->bios = NULL;
+ kfree(adev->fru_info);
+ adev->fru_info = NULL;
+
px = amdgpu_device_supports_px(adev_to_drm(adev));
- if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
apple_gmux_detect(NULL, NULL)))
vga_switcheroo_unregister_client(adev->pdev);
@@ -4114,6 +4448,41 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
* Suspend & resume.
*/
/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i, r;
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ /* Evict the majority of BOs before starting suspend sequence */
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
* amdgpu_device_suspend - initiate device suspend
*
* @dev: drm dev pointer
@@ -4133,11 +4502,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
adev->in_suspend = true;
- /* Evict the majority of BOs before grabbing the full access */
- r = amdgpu_device_evict_resources(adev);
- if (r)
- return r;
-
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_fini_data_exchange(adev);
r = amdgpu_virt_request_full_gpu(adev, false);
@@ -4219,19 +4583,18 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
}
amdgpu_fence_driver_hw_init(adev);
- r = amdgpu_device_ip_late_init(adev);
- if (r)
- goto exit;
-
- queue_delayed_work(system_wq, &adev->delayed_init_work,
- msecs_to_jiffies(AMDGPU_RESUME_MS));
-
if (!adev->in_s0ix) {
r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
if (r)
goto exit;
}
+ r = amdgpu_device_ip_late_init(adev);
+ if (r)
+ goto exit;
+
+ queue_delayed_work(system_wq, &adev->delayed_init_work,
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
exit:
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_init_data_exchange(adev);
@@ -4799,67 +5162,16 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
lockdep_assert_held(&adev->reset_domain->sem);
- for (i = 0; i < adev->num_regs; i++) {
- adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
- trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
- adev->reset_dump_reg_value[i]);
- }
-
- return 0;
-}
-
-#ifdef CONFIG_DEV_COREDUMP
-static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
- size_t count, void *data, size_t datalen)
-{
- struct drm_printer p;
- struct amdgpu_device *adev = data;
- struct drm_print_iterator iter;
- int i;
+ for (i = 0; i < adev->reset_info.num_regs; i++) {
+ adev->reset_info.reset_dump_reg_value[i] =
+ RREG32(adev->reset_info.reset_dump_reg_list[i]);
- iter.data = buffer;
- iter.offset = 0;
- iter.start = offset;
- iter.remain = count;
-
- p = drm_coredump_printer(&iter);
-
- drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
- drm_printf(&p, "kernel: " UTS_RELEASE "\n");
- drm_printf(&p, "module: " KBUILD_MODNAME "\n");
- drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
- if (adev->reset_task_info.pid)
- drm_printf(&p, "process_name: %s PID: %d\n",
- adev->reset_task_info.process_name,
- adev->reset_task_info.pid);
-
- if (adev->reset_vram_lost)
- drm_printf(&p, "VRAM is lost due to GPU reset!\n");
- if (adev->num_regs) {
- drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
-
- for (i = 0; i < adev->num_regs; i++)
- drm_printf(&p, "0x%08x: 0x%08x\n",
- adev->reset_dump_reg_list[i],
- adev->reset_dump_reg_value[i]);
+ trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
+ adev->reset_info.reset_dump_reg_value[i]);
}
- return count - iter.remain;
-}
-
-static void amdgpu_devcoredump_free(void *data)
-{
-}
-
-static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
-
- ktime_get_ts64(&adev->reset_time);
- dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
- amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+ return 0;
}
-#endif
int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_reset_context *reset_context)
@@ -4908,7 +5220,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (r) {
dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
r, adev_to_drm(tmp_adev)->unique);
- break;
+ goto out;
}
}
@@ -4927,9 +5239,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (!r && amdgpu_ras_intr_triggered()) {
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
- tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
- tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
+ amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
}
amdgpu_ras_intr_cleared();
@@ -4961,15 +5271,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
goto out;
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
-#ifdef CONFIG_DEV_COREDUMP
- tmp_adev->reset_vram_lost = vram_lost;
- memset(&tmp_adev->reset_task_info, 0,
- sizeof(tmp_adev->reset_task_info));
- if (reset_context->job && reset_context->job->vm)
- tmp_adev->reset_task_info =
- reset_context->job->vm->task_info;
- amdgpu_reset_capture_coredumpm(tmp_adev);
-#endif
+
+ amdgpu_coredump(tmp_adev, vram_lost, reset_context);
+
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU reset!\n");
amdgpu_inc_vram_lost(tmp_adev);
@@ -4979,6 +5283,11 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (r)
return r;
+ r = amdgpu_xcp_restore_partition_mode(
+ tmp_adev->xcp_mgr);
+ if (r)
+ goto out;
+
r = amdgpu_device_ip_resume_phase2(tmp_adev);
if (r)
goto out;
@@ -5199,7 +5508,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* Flush RAM to disk so that after reboot
* the user can read log and see why the system rebooted.
*/
- if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
+ if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
+ amdgpu_ras_get_context(adev)->reboot) {
DRM_WARN("Emergency reboot.");
ksys_sync_helper();
@@ -5337,8 +5647,9 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
adev->asic_reset_res = r;
/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 4, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
amdgpu_ras_resume(adev);
} else {
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
@@ -5363,9 +5674,6 @@ skip_hw_reset:
drm_sched_start(&ring->sched, true);
}
- if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
- amdgpu_mes_self_test(tmp_adev);
-
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
@@ -6040,7 +6348,7 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
return true;
default:
/* IP discovery */
- if (!adev->ip_versions[DCE_HWIP][0] ||
+ if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
return false;
return true;