summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c2
23 files changed, 87 insertions, 133 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9da14436a373..3d8a48f46b01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -254,8 +254,6 @@ extern int amdgpu_agp;
extern int amdgpu_wbrf;
-extern int fw_bo_location;
-
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
@@ -1146,6 +1144,7 @@ struct amdgpu_device {
bool debug_vm;
bool debug_largebar;
bool debug_disable_soft_recovery;
+ bool debug_use_vram_fw_buf;
};
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 067690ba7bff..77e263660288 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -138,6 +138,9 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
+static const struct drm_client_funcs kfd_client_funcs = {
+ .unregister = drm_client_release,
+};
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
@@ -161,7 +164,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.enable_mes = adev->enable_mes,
};
- ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL);
+ ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
if (ret) {
dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
return;
@@ -695,10 +698,8 @@ err:
void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
{
enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
- /* Temporary workaround to fix issues observed in some
- * compute applications when GFXOFF is enabled on GFX11.
- */
- if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) {
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+ ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) {
pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
amdgpu_gfx_off_ctrl(adev, idle);
} else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index cf6ed5fce291..f262b9d89541 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -311,7 +311,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
- struct dma_fence **ef);
+ struct dma_fence __rcu **ef);
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
struct kfd_vm_fault_info *info);
int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d17b2452cb1f..f183d7faeeec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2802,7 +2802,7 @@ unlock_out:
put_task_struct(usertask);
}
-static void replace_eviction_fence(struct dma_fence **ef,
+static void replace_eviction_fence(struct dma_fence __rcu **ef,
struct dma_fence *new_ef)
{
struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
@@ -2837,7 +2837,7 @@ static void replace_eviction_fence(struct dma_fence **ef,
* 7. Add fence to all PD and PT BOs.
* 8. Unreserve all BOs
*/
-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef)
{
struct amdkfd_process_info *process_info = info;
struct amdgpu_vm *peer_vm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5bb444bb36ce..b158d27d0a71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1544,6 +1544,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+ release_firmware(adev->pm.fw);
if (fw_ver < 0x00160e00)
return true;
}
@@ -5245,7 +5246,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_device *tmp_adev = NULL;
bool need_full_reset, skip_hw_reset, vram_lost = false;
int r = 0;
- bool gpu_reset_for_dev_remove = 0;
/* Try reset handler method first */
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
@@ -5265,10 +5265,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
- gpu_reset_for_dev_remove =
- test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
- test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
-
/*
* ASIC reset has to be done on all XGMI hive nodes ASAP
* to allow proper links negotiation in FW (within 1 sec)
@@ -5311,18 +5307,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
amdgpu_ras_intr_cleared();
}
- /* Since the mode1 reset affects base ip blocks, the
- * phase1 ip blocks need to be resumed. Otherwise there
- * will be a BIOS signature error and the psp bootloader
- * can't load kdb on the next amdgpu install.
- */
- if (gpu_reset_for_dev_remove) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list)
- amdgpu_device_ip_resume_phase1(tmp_adev);
-
- goto end;
- }
-
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
if (need_full_reset) {
/* post card */
@@ -5559,11 +5543,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
int i, r = 0;
bool need_emergency_restart = false;
bool audio_suspended = false;
- bool gpu_reset_for_dev_remove = false;
-
- gpu_reset_for_dev_remove =
- test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
- test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
/*
* Special case: RAS triggered and full reset isn't supported
@@ -5601,7 +5580,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
list_add_tail(&tmp_adev->reset_list, &device_list);
- if (gpu_reset_for_dev_remove && adev->shutdown)
+ if (adev->shutdown)
tmp_adev->shutdown = true;
}
if (!list_is_first(&adev->reset_list, &device_list))
@@ -5686,10 +5665,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
retry: /* Rest of adevs pre asic reset from XGMI hive. */
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (gpu_reset_for_dev_remove) {
- /* Workaroud for ASICs need to disable SMC first */
- amdgpu_device_smu_fini_early(tmp_adev);
- }
r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
/*TODO Should we stop ?*/
if (r) {
@@ -5721,9 +5696,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
if (r && r == -EAGAIN)
goto retry;
-
- if (!r && gpu_reset_for_dev_remove)
- goto recover_end;
}
skip_hw_reset:
@@ -5779,7 +5751,6 @@ skip_sched_resume:
amdgpu_ras_set_error_query_ready(tmp_adev, true);
}
-recover_end:
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 0431eafa86b5..c7d60dd0fb97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1963,8 +1963,6 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
break;
case IP_VERSION(9, 4, 3):
- if (!amdgpu_exp_hw_support)
- return -EINVAL;
amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
break;
case IP_VERSION(10, 1, 10):
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 852cec98ff26..cc69005f5b46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -128,6 +128,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_VM = BIT(0),
AMDGPU_DEBUG_LARGEBAR = BIT(1),
AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+ AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -210,7 +211,6 @@ int amdgpu_seamless = -1; /* auto */
uint amdgpu_debug_mask;
int amdgpu_agp = -1; /* auto */
int amdgpu_wbrf = -1;
-int fw_bo_location = -1;
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@@ -990,10 +990,6 @@ MODULE_PARM_DESC(wbrf,
"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
module_param_named(wbrf, amdgpu_wbrf, int, 0444);
-MODULE_PARM_DESC(fw_bo_location,
- "location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram");
-module_param(fw_bo_location, int, 0644);
-
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
@@ -2122,6 +2118,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: soft reset for GPU recovery disabled\n");
adev->debug_disable_soft_recovery = true;
}
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) {
+ pr_info("debug: place fw in vram for frontdoor loading\n");
+ adev->debug_use_vram_fw_buf = true;
+ }
}
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
@@ -2233,6 +2234,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, ddev);
+ amdgpu_init_debug_options(adev);
+
ret = amdgpu_driver_load_kms(adev, flags);
if (ret)
goto err_pci;
@@ -2313,8 +2316,6 @@ retry_init:
amdgpu_get_secondary_funcs(adev);
}
- amdgpu_init_debug_options(adev);
-
return 0;
err_pci:
@@ -2336,38 +2337,6 @@ amdgpu_pci_remove(struct pci_dev *pdev)
pm_runtime_forbid(dev->dev);
}
- if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
- !amdgpu_sriov_vf(adev)) {
- bool need_to_reset_gpu = false;
-
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- struct amdgpu_hive_info *hive;
-
- hive = amdgpu_get_xgmi_hive(adev);
- if (hive->device_remove_count == 0)
- need_to_reset_gpu = true;
- hive->device_remove_count++;
- amdgpu_put_xgmi_hive(hive);
- } else {
- need_to_reset_gpu = true;
- }
-
- /* Workaround for ASICs need to reset SMU.
- * Called only when the first device is removed.
- */
- if (need_to_reset_gpu) {
- struct amdgpu_reset_context reset_context;
-
- adev->shutdown = true;
- memset(&reset_context, 0, sizeof(reset_context));
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags);
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
- }
- }
-
amdgpu_driver_unload_kms(dev);
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d2f273d77e59..55784a9f26c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -1045,21 +1045,28 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
* seconds, so here, we just pick up three parts for emulation.
*/
ret = memcmp(vram_ptr, cptr, 10);
- if (ret)
- return ret;
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
ret = memcmp(vram_ptr + (size / 2), cptr, 10);
- if (ret)
- return ret;
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
ret = memcmp(vram_ptr + size - 10, cptr, 10);
- if (ret)
- return ret;
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
+release_buffer:
amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
&vram_ptr);
- return 0;
+ return ret;
}
static ssize_t current_memory_partition_show(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b5ebafd4a3ad..bf4f48fe438d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1105,7 +1105,12 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GPU_AVG_POWER,
(void *)&ui32, &ui32_size)) {
- return -EINVAL;
+ /* fall back to input power for backwards compat */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
}
ui32 >>= 8;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 2addbdf88394..0328616473f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -466,7 +466,7 @@ static int psp_sw_init(void *handle)
}
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&psp->fw_pri_bo,
&psp->fw_pri_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index fc42fb6ee191..31823a30dea2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -305,11 +305,13 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return -EINVAL;
data->head.block = block_id;
- /* only ue and ce errors are supported */
+ /* only ue, ce and poison errors are supported */
if (!memcmp("ue", err, 2))
data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
else if (!memcmp("ce", err, 2))
data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+ else if (!memcmp("poison", err, 6))
+ data->head.type = AMDGPU_RAS_ERROR__POISON;
else
return -EINVAL;
@@ -431,9 +433,10 @@ static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
* The block is one of: umc, sdma, gfx, etc.
* see ras_block_string[] for details
*
- * The error type is one of: ue, ce, where,
+ * The error type is one of: ue, ce and poison where,
* ue is multi-uncorrectable
* ce is single-correctable
+ * poison is poison
*
* The sub-block is a the sub-block index, pass 0 if there is no sub-block.
* The address and value are hexadecimal numbers, leading 0x is optional.
@@ -1067,8 +1070,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
mcm_info = &err_info->mcm_info;
if (err_info->ce_count) {
dev_info(adev->dev, "socket: %d, die: %d, "
- "%lld new correctable hardware errors detected in %s block, "
- "no user action is needed\n",
+ "%lld new correctable hardware errors detected in %s block\n",
mcm_info->socket_id,
mcm_info->die_id,
err_info->ce_count,
@@ -1080,8 +1082,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
err_info = &err_node->err_info;
mcm_info = &err_info->mcm_info;
dev_info(adev->dev, "socket: %d, die: %d, "
- "%lld correctable hardware errors detected in total in %s block, "
- "no user action is needed\n",
+ "%lld correctable hardware errors detected in total in %s block\n",
mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name);
}
}
@@ -1108,16 +1109,14 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
adev->smuio.funcs->get_die_id) {
dev_info(adev->dev, "socket: %d, die: %d "
"%ld correctable hardware errors "
- "detected in %s block, no user "
- "action is needed.\n",
+ "detected in %s block\n",
adev->smuio.funcs->get_socket_id(adev),
adev->smuio.funcs->get_die_id(adev),
ras_mgr->err_data.ce_count,
blk_name);
} else {
dev_info(adev->dev, "%ld correctable hardware errors "
- "detected in %s block, no user "
- "action is needed.\n",
+ "detected in %s block\n",
ras_mgr->err_data.ce_count,
blk_name);
}
@@ -1920,7 +1919,7 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
struct amdgpu_iv_entry *entry)
{
dev_info(obj->adev->dev,
- "Poison is created, no user action is needed.\n");
+ "Poison is created\n");
}
static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
@@ -2920,6 +2919,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
amdgpu_ras_query_poison_mode(adev);
+ /* Packed socket_id to ras feature mask bits[31:29] */
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id)
+ con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29);
+
/* Get RAS schema for particular SOC */
con->schema = amdgpu_get_ras_schema(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index b0335a1c5e90..19899f6b9b2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -32,7 +32,6 @@ enum AMDGPU_RESET_FLAGS {
AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1,
- AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
};
struct amdgpu_reset_context {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index d334e42fe0eb..3e12763e477a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,7 +1062,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
{
if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
- (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index b6cd565562ad..4740dd65b99d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -116,7 +116,7 @@ struct amdgpu_mem_stats;
#define AMDGPU_VM_FAULT_STOP_FIRST 1
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
-/* Reserve 4MB VRAM for page tables */
+/* How much VRAM be reserved for page tables */
#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20)
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 6f149b54d4d3..b9a15d51eb5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -59,11 +59,8 @@ static inline uint16_t complete_integer_division_u16(
static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
{
- bool arg1_negative = numerator < 0;
- bool arg2_negative = denominator < 0;
-
- uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator);
- uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator);
+ u16 arg1_value = numerator;
+ u16 arg2_value = denominator;
uint16_t remainder;
@@ -100,9 +97,6 @@ static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
res_value += summand;
}
- if (arg1_negative ^ arg2_negative)
- res_value = -res_value;
-
return res_value;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 6cab882e8061..1592c63b3099 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -43,7 +43,6 @@ struct amdgpu_hive_info {
} pstate;
struct amdgpu_reset_domain *reset_domain;
- uint32_t device_remove_count;
atomic_t ras_recovery;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
index f0737fb3a999..d1bba9c64e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -30,6 +30,8 @@
#define regATHUB_MISC_CNTL_V3_0_1 0x00d7
#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0
+#define regATHUB_MISC_CNTL_V3_3_0 0x00d8
+#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX 0
static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
@@ -40,6 +42,9 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 1):
data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
break;
+ case IP_VERSION(3, 3, 0):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0);
+ break;
default:
data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
break;
@@ -53,6 +58,9 @@ static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
case IP_VERSION(3, 0, 1):
WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
break;
+ case IP_VERSION(3, 3, 0):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data);
+ break;
default:
WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 73f6d7e72c73..d63cab294883 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3996,16 +3996,13 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
- /* don't check this. There are apparently firmwares in the wild with
- * incorrect size in the header
- */
- if (err == -ENODEV)
- goto out;
+ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
- dev_dbg(adev->dev,
- "gfx10: amdgpu_ucode_request() failed \"%s\"\n",
- fw_name);
+ goto out;
+
+ /* don't validate this firmware. There are apparently firmwares
+ * in the wild with incorrect size in the header
+ */
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 2fbcd9765980..0ea0866c261f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -115,7 +115,7 @@ static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188),
- SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007),
SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007),
SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000),
SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
@@ -6383,6 +6383,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+ continue;
mask = 1;
counter = 0;
gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 95d06da544e2..49aecdcee006 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -456,10 +456,12 @@ static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev,
WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
- tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
- WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
- WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 6d24c84924cb..19986ff6a48d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -401,8 +401,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
if (err_data.ce_count)
dev_info(adev->dev, "%ld correctable hardware "
- "errors detected in %s block, "
- "no user action is needed.\n",
+ "errors detected in %s block\n",
obj->err_data.ce_count,
get_ras_block_str(adev->nbio.ras_if));
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 25a3da83e0fb..e90f33780803 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -597,8 +597,7 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device
if (err_data.ce_count)
dev_info(adev->dev, "%ld correctable hardware "
- "errors detected in %s block, "
- "no user action is needed.\n",
+ "errors detected in %s block\n",
obj->err_data.ce_count,
get_ras_block_str(adev->nbio.ras_if));
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 530549314ce4..a3ee3c4c650f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -64,7 +64,7 @@ static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
uint64_t reg_value;
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
- dev_info(adev->dev, "Deferred error, no user action is needed.\n");
+ dev_info(adev->dev, "Deferred error\n");
if (mc_umc_status)
dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);