From ad51c46eec739c18be24178a30b47801b10e0357 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Thu, 21 Mar 2019 13:26:28 +0800 Subject: drm/amd/amdgpu: fix PCIe dpm feature issue (v3) use pcie_bandwidth_available to get real link state to update pcie table. v2: fix incorrect initialized return value v3: expand the fetching method about the link width to all asics. Signed-off-by: Chengming Gui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4f8fb4ecde34..ac0d646a7b74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3625,6 +3625,7 @@ static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, struct pci_dev *pdev = adev->pdev; enum pci_bus_speed cur_speed; enum pcie_link_width cur_width; + u32 ret = 1; *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; @@ -3632,6 +3633,10 @@ static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, while (pdev) { cur_speed = pcie_get_speed_cap(pdev); cur_width = pcie_get_width_cap(pdev); + ret = pcie_bandwidth_available(adev->pdev, NULL, + NULL, &cur_width); + if (!ret) + cur_width = PCIE_LNK_WIDTH_RESRV; if (cur_speed != PCI_SPEED_UNKNOWN) { if (*speed == PCI_SPEED_UNKNOWN) -- cgit From 1712fb1a2f6829150032ac76eb0e39b82a549cfb Mon Sep 17 00:00:00 2001 From: wentalou Date: Tue, 2 Apr 2019 17:13:05 +0800 Subject: drm/amdgpu: amdgpu_device_recover_vram always failed if only one node in shadow_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_bo_restore_shadow would assign zero to r if succeeded. r would remain zero if there is only one node in shadow_list. current code would always return failure when r <= 0. restart the timeout for each wait was a rather problematic bug as well. The value of tmo SHOULD be changed, otherwise we wait tmo jiffies on each loop. Signed-off-by: Wentao Lou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ac0d646a7b74..5d8b30fd4534 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3173,11 +3173,16 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) break; if (fence) { - r = dma_fence_wait_timeout(fence, false, tmo); + tmo = dma_fence_wait_timeout(fence, false, tmo); dma_fence_put(fence); fence = next; - if (r <= 0) + if (tmo == 0) { + r = -ETIMEDOUT; break; + } else if (tmo < 0) { + r = tmo; + break; + } } else { fence = next; } @@ -3188,8 +3193,8 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) tmo = dma_fence_wait_timeout(fence, false, tmo); dma_fence_put(fence); - if (r <= 0 || tmo <= 0) { - DRM_ERROR("recover vram bo from shadow failed\n"); + if (r < 0 || tmo <= 0) { + DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); return -EIO; } -- cgit From b575f10dbd6f84c2c8744ff1f486bfae1e4f6f38 Mon Sep 17 00:00:00 2001 From: wentalou Date: Fri, 12 Apr 2019 15:01:14 +0800 Subject: drm/amdgpu: shadow in shadow_list without tbo.mem.start cause page fault in sriov TDR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shadow was added into shadow_list by amdgpu_bo_create_shadow. meanwhile, shadow->tbo.mem was not fully configured. tbo.mem would be fully configured by amdgpu_vm_sdma_map_table until calling amdgpu_vm_clear_bo. If sriov TDR occurred between amdgpu_bo_create_shadow and amdgpu_vm_sdma_map_table, amdgpu_device_recover_vram would deal with shadow without tbo.mem.start. Signed-off-by: Wentao Lou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5d8b30fd4534..79fb302fb954 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3165,6 +3165,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) /* No need to recover an evicted BO */ if (shadow->tbo.mem.mem_type != TTM_PL_TT || + shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) continue; -- cgit