From 6d16dac85c081825af58111023428c43d1da7e1a Mon Sep 17 00:00:00 2001
From: Yong Zhao <Yong.Zhao@amd.com>
Date: Thu, 31 Aug 2017 15:55:00 -0400
Subject: drm/amdgpu: Set the correct value for PDEs/PTEs of ATC memory on
 Raven
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without the additional bits set in PDEs/PTEs, the ATC memory access
would have failed on Raven.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  9 ++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 10 ++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index fee0a32ac56f..b500bb6a8491 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -328,9 +328,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 				AMDGPU_GEM_CREATE_SHADOW);
 
 	if (vm->pte_support_ats) {
-		init_value = AMDGPU_PTE_SYSTEM;
+		init_value = AMDGPU_PTE_DEFAULT_ATC;
 		if (level != adev->vm_manager.num_level - 1)
 			init_value |= AMDGPU_PDE_PTE;
+
 	}
 
 	/* walk over the address space and allocate the page tables */
@@ -2017,7 +2018,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		list_del(&mapping->list);
 
 		if (vm->pte_support_ats)
-			init_pte_value = AMDGPU_PTE_SYSTEM;
+			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
 		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
 						mapping->start, mapping->last,
@@ -2629,7 +2630,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 		if (adev->asic_type == CHIP_RAVEN) {
 			vm->pte_support_ats = true;
-			init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE;
+			init_pde_value = AMDGPU_PTE_DEFAULT_ATC
+					| AMDGPU_PDE_PTE;
+
 		}
 	} else
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d68f39b4e5e7..aa914256b4bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -73,6 +73,16 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
 
+/* For Raven */
+#define AMDGPU_MTYPE_CC 2
+
+#define AMDGPU_PTE_DEFAULT_ATC  (AMDGPU_PTE_SYSTEM      \
+                                | AMDGPU_PTE_SNOOPED    \
+                                | AMDGPU_PTE_EXECUTABLE \
+                                | AMDGPU_PTE_READABLE   \
+                                | AMDGPU_PTE_WRITEABLE  \
+                                | AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC))
+
 /* How to programm VM fault handling */
 #define AMDGPU_VM_FAULT_STOP_NEVER	0
 #define AMDGPU_VM_FAULT_STOP_FIRST	1
-- 
cgit 


From a05502e5cfa9abe17a16592be82c2f5692c91f35 Mon Sep 17 00:00:00 2001
From: Horace Chen <horace.chen@amd.com>
Date: Fri, 29 Sep 2017 14:41:57 +0800
Subject: drm/amdgpu: Reserve shared memory on VRAM for SR-IOV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SR-IOV need to reserve a piece of shared VRAM at the exact place
to exchange data betweem PF and VF. The start address and size of
the shared mem are passed to guest through VBIOS structure
VRAM_UsageByFirmware.

VRAM_UsageByFirmware is a general feature in VBIOS, it indicates
that VBIOS need to reserve a piece of memory on the VRAM.

Because the mem address is specified. Reserve it early in
amdgpu_ttm_init to make sure that it can monoplize the space.

Signed-off-by: Horace Chen <horace.chen@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h          | 14 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 18 ++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 76 ++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c      |  9 ++++
 4 files changed, 116 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a23b8af95319..5c4bed7778d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1378,6 +1378,18 @@ struct amdgpu_atcs {
 	struct amdgpu_atcs_functions functions;
 };
 
+/*
+ * Firmware VRAM reservation
+ */
+struct amdgpu_fw_vram_usage {
+	u64 start_offset;
+	u64 size;
+	struct amdgpu_bo *reserved_bo;
+	void *va;
+};
+
+int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev);
+
 /*
  * CGS
  */
@@ -1582,6 +1594,8 @@ struct amdgpu_device {
 	struct delayed_work     late_init_work;
 
 	struct amdgpu_virt	virt;
+	/* firmware VRAM reservation */
+	struct amdgpu_fw_vram_usage fw_vram_usage;
 
 	/* link all shadow bo */
 	struct list_head                shadow_list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index ce443586a0c7..f66d33e4baca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1807,6 +1807,8 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
 	uint16_t data_offset;
 	int usage_bytes = 0;
 	struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage;
+	u64 start_addr;
+	u64 size;
 
 	if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
 		firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
@@ -1815,7 +1817,21 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
 			  le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
 			  le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
 
-		usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
+		start_addr = firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware;
+		size = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb;
+
+		if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+			(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+			ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+			/* Firmware request VRAM reservation for SR-IOV */
+			adev->fw_vram_usage.start_offset = (start_addr &
+				(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+			adev->fw_vram_usage.size = size << 10;
+			/* Use the default scratch size */
+			usage_bytes = 0;
+		} else {
+			usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
+		}
 	}
 	ctx->scratch_size_bytes = 0;
 	if (usage_bytes == 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1949d8aedf49..7b3e3b5461c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -657,6 +657,81 @@ void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc)
 			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
 }
 
+/*
+ * Firmware Reservation functions
+ */
+/**
+ * amdgpu_fw_reserve_vram_fini - free fw reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free fw reserved vram if it has been reserved.
+ */
+void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo,
+		NULL, &adev->fw_vram_usage.va);
+}
+
+/**
+ * amdgpu_fw_reserve_vram_init - create bo vram reservation from fw
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from fw.
+ */
+int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev)
+{
+	int r = 0;
+	u64 gpu_addr;
+	u64 vram_size = adev->mc.visible_vram_size;
+
+	adev->fw_vram_usage.va = NULL;
+	adev->fw_vram_usage.reserved_bo = NULL;
+
+	if (adev->fw_vram_usage.size > 0 &&
+		adev->fw_vram_usage.size <= vram_size) {
+
+		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
+			PAGE_SIZE, true, 0,
+			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
+			&adev->fw_vram_usage.reserved_bo);
+		if (r)
+			goto error_create;
+
+		r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
+		if (r)
+			goto error_reserve;
+		r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
+			AMDGPU_GEM_DOMAIN_VRAM,
+			adev->fw_vram_usage.start_offset,
+			(adev->fw_vram_usage.start_offset +
+			adev->fw_vram_usage.size), &gpu_addr);
+		if (r)
+			goto error_pin;
+		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
+			&adev->fw_vram_usage.va);
+		if (r)
+			goto error_kmap;
+
+		amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
+	}
+	return r;
+
+error_kmap:
+	amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
+error_pin:
+	amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
+error_reserve:
+	amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
+error_create:
+	adev->fw_vram_usage.va = NULL;
+	adev->fw_vram_usage.reserved_bo = NULL;
+	return r;
+}
+
+
 /*
  * GPU helpers function.
  */
@@ -2300,6 +2375,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	/* evict vram memory */
 	amdgpu_bo_evict_vram(adev);
 	amdgpu_ib_pool_fini(adev);
+	amdgpu_fw_reserve_vram_fini(adev);
 	amdgpu_fence_driver_fini(adev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 15a28578d458..1f68a146e26c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1255,6 +1255,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	/* Change the size here instead of the init above so only lpfn is affected */
 	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
 
+	/*
+	 *The reserved vram for firmware must be pinned to the specified
+	 *place on the VRAM, so reserve it early.
+	 */
+	r = amdgpu_fw_reserve_vram_init(adev);
+	if (r) {
+		return r;
+	}
+
 	r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
 				    AMDGPU_GEM_DOMAIN_VRAM,
 				    &adev->stolen_vga_memory,
-- 
cgit 


From 9fc8fc709b356c85034cbcb3b84c9d8b77865f52 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 18 Sep 2017 13:58:30 +0200
Subject: drm/amdgpu: add VM support for huge pages v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert GTT mappings into linear ones for huge page handling.

v2: use fragment size as minimum for linear conversion

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b500bb6a8491..eb4a01c14eee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1699,6 +1699,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      struct drm_mm_node *nodes,
 				      struct dma_fence **fence)
 {
+	unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
 	uint64_t pfn, start = mapping->start;
 	int r;
 
@@ -1733,6 +1734,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	}
 
 	do {
+		dma_addr_t *dma_addr = NULL;
 		uint64_t max_entries;
 		uint64_t addr, last;
 
@@ -1746,15 +1748,32 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 
 		if (pages_addr) {
+			uint64_t count;
+
 			max_entries = min(max_entries, 16ull * 1024ull);
-			addr = 0;
+			for (count = 1; count < max_entries; ++count) {
+				uint64_t idx = pfn + count;
+
+				if (pages_addr[idx] !=
+				    (pages_addr[idx - 1] + PAGE_SIZE))
+					break;
+			}
+
+			if (count < min_linear_pages) {
+				addr = pfn << PAGE_SHIFT;
+				dma_addr = pages_addr;
+			} else {
+				addr = pages_addr[pfn];
+				max_entries = count;
+			}
+
 		} else if (flags & AMDGPU_PTE_VALID) {
 			addr += adev->vm_manager.vram_base_offset;
+			addr += pfn << PAGE_SHIFT;
 		}
-		addr += pfn << PAGE_SHIFT;
 
 		last = min((uint64_t)mapping->last, start + max_entries - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
-- 
cgit 


From b82485fd384a56c27fae44e649552eca6334237a Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Fri, 15 Sep 2017 21:05:19 -0400
Subject: drm/amdgpu: add helper to convert a ttm bo to amdgpu_bo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 5 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 9 +++++----
 3 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6982baeccd14..8b4ed8a98a18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -40,9 +40,7 @@
 static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
-	struct amdgpu_bo *bo;
-
-	bo = container_of(tbo, struct amdgpu_bo, tbo);
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
 
 	amdgpu_bo_kunmap(bo);
 
@@ -884,7 +882,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 		return;
 
-	abo = container_of(bo, struct amdgpu_bo, tbo);
+	abo = ttm_to_amdgpu_bo(bo);
 	amdgpu_vm_bo_invalidate(adev, abo, evict);
 
 	amdgpu_bo_kunmap(abo);
@@ -911,7 +909,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 		return 0;
 
-	abo = container_of(bo, struct amdgpu_bo, tbo);
+	abo = ttm_to_amdgpu_bo(bo);
 
 	/* Remember that this BO was accessed by the CPU */
 	abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 39b6bf6fb051..c26ef53604af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -94,6 +94,11 @@ struct amdgpu_bo {
 	};
 };
 
+static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
+{
+	return container_of(tbo, struct amdgpu_bo, tbo);
+}
+
 /**
  * amdgpu_mem_type_to_domain - return domain corresponding to mem_type
  * @mem_type:	ttm memory type
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1f68a146e26c..10952c3e5eb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -44,6 +44,7 @@
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
 #include "amdgpu.h"
+#include "amdgpu_object.h"
 #include "amdgpu_trace.h"
 #include "bif/bif_4_1_d.h"
 
@@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 		placement->num_busy_placement = 1;
 		return;
 	}
-	abo = container_of(bo, struct amdgpu_bo, tbo);
+	abo = ttm_to_amdgpu_bo(bo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 		if (adev->mman.buffer_funcs &&
@@ -257,7 +258,7 @@ gtt:
 
 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
-	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 
 	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
 		return -EPERM;
@@ -484,7 +485,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 	int r;
 
 	/* Can't move a pinned BO */
-	abo = container_of(bo, struct amdgpu_bo, tbo);
+	abo = ttm_to_amdgpu_bo(bo);
 	if (WARN_ON_ONCE(abo->pin_count > 0))
 		return -EINVAL;
 
@@ -1142,7 +1143,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 				    unsigned long offset,
 				    void *buf, int len, int write)
 {
-	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
 	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
 	uint32_t value = 0;
-- 
cgit 


From 177ae09b5d699a5ebd1cafcee78889db968abf54 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Fri, 15 Sep 2017 20:44:06 -0400
Subject: drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a flag to signal that access to a BO will be synchronized
through an external mechanism.

Currently all buffers shared between contexts are subject to implicit
synchronization. However, this is only required for protocols that
currently don't support an explicit synchronization mechanism (DRI2/3).

This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that
users can specify when it is safe to disable implicit sync.

v2: only disable explicit sync in amdgpu_cs_ioctl

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    | 4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c   | 7 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h   | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 8 ++++----
 7 files changed, 27 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ab83dfcabb41..38027a00f8ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -705,7 +705,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 
 	list_for_each_entry(e, &p->validated, tv.head) {
 		struct reservation_object *resv = e->robj->tbo.resv;
-		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp);
+		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
+				     amdgpu_bo_explicit_sync(e->robj));
 
 		if (r)
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index b0d45c8e6bb3..21e99366cab3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
-		      AMDGPU_GEM_CREATE_VM_ALWAYS_VALID))
+		      AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
+		      AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
+
 		return -EINVAL;
 
 	/* reject invalid gem domains */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index c26ef53604af..428aae048f4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
 	}
 }
 
+/**
+ * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
+ */
+static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
+{
+	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
+}
+
 int amdgpu_bo_create(struct amdgpu_device *adev,
 			    unsigned long size, int byte_align,
 			    bool kernel, u32 domain, u64 flags,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index c586f44312f9..a4bf21f8f1c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
  *
  * @sync: sync object to add fences from reservation object to
  * @resv: reservation object with embedded fence
- * @shared: true if we should only sync to the exclusive fence
+ * @explicit_sync: true if we should only sync to the exclusive fence
  *
  * Sync to the fence
  */
 int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
-		     void *owner)
+		     void *owner, bool explicit_sync)
 {
 	struct reservation_object_list *flist;
 	struct dma_fence *f;
@@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 	f = reservation_object_get_excl(resv);
 	r = amdgpu_sync_fence(adev, sync, f);
 
+	if (explicit_sync)
+		return r;
+
 	flist = reservation_object_get_list(resv);
 	if (!flist || r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index dc7687993317..70d7e3a279a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
-		     void *owner);
+		     void *owner,
+		     bool explicit_sync);
 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 				     struct amdgpu_ring *ring);
 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 10952c3e5eb6..a2282bacf960 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1489,7 +1489,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	job->vm_needs_flush = vm_needs_flush;
 	if (resv) {
 		r = amdgpu_sync_resv(adev, &job->sync, resv,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     false);
 		if (r) {
 			DRM_ERROR("sync failed (%d).\n", r);
 			goto error_free;
@@ -1581,7 +1582,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 
 	if (resv) {
 		r = amdgpu_sync_resv(adev, &job->sync, resv,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
+				     AMDGPU_FENCE_OWNER_UNDEFINED, false);
 		if (r) {
 			DRM_ERROR("sync failed (%d).\n", r);
 			goto error_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index eb4a01c14eee..c559d76ff695 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1035,7 +1035,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	int r;
 
 	amdgpu_sync_create(&sync);
-	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
+	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
 	r = amdgpu_sync_wait(&sync, true);
 	amdgpu_sync_free(&sync);
 
@@ -1176,11 +1176,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			amdgpu_ring_pad_ib(ring, params.ib);
 			amdgpu_sync_resv(adev, &job->sync,
 					 parent->base.bo->tbo.resv,
-					 AMDGPU_FENCE_OWNER_VM);
+					 AMDGPU_FENCE_OWNER_VM, false);
 			if (shadow)
 				amdgpu_sync_resv(adev, &job->sync,
 						 shadow->tbo.resv,
-						 AMDGPU_FENCE_OWNER_VM);
+						 AMDGPU_FENCE_OWNER_VM, false);
 
 			WARN_ON(params.ib->length_dw > ndw);
 			r = amdgpu_job_submit(job, ring, &vm->entity,
@@ -1644,7 +1644,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		goto error_free;
 
 	r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
-			     owner);
+			     owner, false);
 	if (r)
 		goto error_free;
 
-- 
cgit 


From c2636dc53abd8269a0930bccd564f2f195dba729 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Thu, 22 Dec 2016 17:06:50 -0500
Subject: drm/amdgpu: add parameter to allocate high priority contexts v11
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new context creation parameter to express a global context priority.

The priority ranking in descending order is as follows:
 * AMDGPU_CTX_PRIORITY_HIGH_HW
 * AMDGPU_CTX_PRIORITY_HIGH_SW
 * AMDGPU_CTX_PRIORITY_NORMAL
 * AMDGPU_CTX_PRIORITY_LOW_SW
 * AMDGPU_CTX_PRIORITY_LOW_HW

The driver will attempt to schedule work to the hardware according to
the priorities. No latency or throughput guarantees are provided by
this patch.

This interface intends to service the EGL_IMG_context_priority
extension, and vulkan equivalents.

Setting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER.

v2: Instead of using flags, repurpose __pad
v3: Swap enum values of _NORMAL _HIGH for backwards compatibility
v4: Validate usermode priority and store it
v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword
v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN
v7: remove ctx->priority
v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE
v9: change the priority parameter to __s32
v10: split priorities into _SW and _HW
v11: Allow DRM_MASTER without CAP_SYS_NICE

Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 61 ++++++++++++++++++++++++++++++---
 1 file changed, 57 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 75c933b1a432..52388b1b52c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -23,13 +23,40 @@
  */
 
 #include <drm/drmP.h>
+#include <drm/drm_auth.h>
 #include "amdgpu.h"
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_priority_permit(struct drm_file *filp,
+				      enum amd_sched_priority priority)
+{
+	/* NORMAL and below are accessible by everyone */
+	if (priority <= AMD_SCHED_PRIORITY_NORMAL)
+		return 0;
+
+	if (capable(CAP_SYS_NICE))
+		return 0;
+
+	if (drm_is_current_master(filp))
+		return 0;
+
+	return -EACCES;
+}
+
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+			   enum amd_sched_priority priority,
+			   struct drm_file *filp,
+			   struct amdgpu_ctx *ctx)
 {
 	unsigned i, j;
 	int r;
 
+	if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX)
+		return -EINVAL;
+
+	r = amdgpu_ctx_priority_permit(filp, priority);
+	if (r)
+		return r;
+
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->adev = adev;
 	kref_init(&ctx->refcount);
@@ -51,7 +78,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 		struct amdgpu_ring *ring = adev->rings[i];
 		struct amd_sched_rq *rq;
 
-		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+		rq = &ring->sched.sched_rq[priority];
 
 		if (ring == &adev->gfx.kiq.ring)
 			continue;
@@ -100,6 +127,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 			    struct amdgpu_fpriv *fpriv,
+			    struct drm_file *filp,
+			    enum amd_sched_priority priority,
 			    uint32_t *id)
 {
 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
@@ -117,8 +146,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 		kfree(ctx);
 		return r;
 	}
+
 	*id = (uint32_t)r;
-	r = amdgpu_ctx_init(adev, ctx);
+	r = amdgpu_ctx_init(adev, priority, filp, ctx);
 	if (r) {
 		idr_remove(&mgr->ctx_handles, *id);
 		*id = 0;
@@ -188,11 +218,30 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
 	return 0;
 }
 
+static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
+{
+	switch (amdgpu_priority) {
+	case AMDGPU_CTX_PRIORITY_HIGH_HW:
+		return AMD_SCHED_PRIORITY_HIGH_HW;
+	case AMDGPU_CTX_PRIORITY_HIGH_SW:
+		return AMD_SCHED_PRIORITY_HIGH_SW;
+	case AMDGPU_CTX_PRIORITY_NORMAL:
+		return AMD_SCHED_PRIORITY_NORMAL;
+	case AMDGPU_CTX_PRIORITY_LOW_SW:
+	case AMDGPU_CTX_PRIORITY_LOW_HW:
+		return AMD_SCHED_PRIORITY_LOW;
+	default:
+		WARN(1, "Invalid context priority %d\n", amdgpu_priority);
+		return AMD_SCHED_PRIORITY_NORMAL;
+	}
+}
+
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp)
 {
 	int r;
 	uint32_t id;
+	enum amd_sched_priority priority;
 
 	union drm_amdgpu_ctx *args = data;
 	struct amdgpu_device *adev = dev->dev_private;
@@ -200,10 +249,14 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 
 	r = 0;
 	id = args->in.ctx_id;
+	priority = amdgpu_to_sched_priority(args->in.priority);
+
+	if (priority >= AMD_SCHED_PRIORITY_MAX)
+		return -EINVAL;
 
 	switch (args->in.op) {
 	case AMDGPU_CTX_OP_ALLOC_CTX:
-		r = amdgpu_ctx_alloc(adev, fpriv, &id);
+		r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
 		args->out.alloc.ctx_id = id;
 		break;
 	case AMDGPU_CTX_OP_FREE_CTX:
-- 
cgit 


From b2ff0e8ac4ce1fb647ae40feb4cf26bc9301e0c9 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Mon, 20 Feb 2017 17:53:19 -0500
Subject: drm/amdgpu: add framework for HW specific priority settings v9
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an initial framework for changing the HW priorities of rings. The
framework allows requesting priority changes for the lifetime of an
amdgpu_job. After the job completes the priority will decay to the next
lowest priority for which a request is still valid.

A new ring function set_priority() can now be populated to take care of
the HW specific programming sequence for priority changes.

v2: set priority before emitting IB, and take a ref on amdgpu_job
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*
v4: plug amdgpu_ring_restore_priority_cb into amdgpu_job_free_cb
v5: use atomic for tracking job priorities instead of last_job
v6: rename amdgpu_ring_priority_[get/put]() and align parameters
v7: replace spinlocks with mutexes for KIQ compatibility
v8: raise ring priority during cs_ioctl, instead of job_run
v9: priority_get() before push_job()

Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 +++++++
 4 files changed, 96 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 38027a00f8ab..fe7dd44ac9fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1177,6 +1177,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job->uf_sequence = seq;
 
 	amdgpu_job_free_resources(job);
+	amdgpu_ring_priority_get(job->ring,
+				 amd_sched_get_job_priority(&job->base));
 
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4510627ae83e..83d13431cbdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -103,6 +103,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
 {
 	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
+	amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job));
 	dma_fence_put(job->fence);
 	amdgpu_sync_free(&job->sync);
 	amdgpu_sync_free(&job->dep_sync);
@@ -139,6 +140,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 	job->fence_ctx = entity->fence_context;
 	*f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
+	amdgpu_ring_priority_get(job->ring,
+				 amd_sched_get_job_priority(&job->base));
 	amd_sched_entity_push_job(&job->base);
 
 	return 0;
@@ -203,6 +206,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	/* if gpu reset, hw fence will be replaced here */
 	dma_fence_put(job->fence);
 	job->fence = dma_fence_get(fence);
+
 	amdgpu_job_free_resources(job);
 	return fence;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 019932a7ea3a..e5ece1fae149 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -154,6 +154,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
 		ring->funcs->end_use(ring);
 }
 
+/**
+ * amdgpu_ring_priority_put - restore a ring's priority
+ *
+ * @ring: amdgpu_ring structure holding the information
+ * @priority: target priority
+ *
+ * Release a request for executing at @priority
+ */
+void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
+			      enum amd_sched_priority priority)
+{
+	int i;
+
+	if (!ring->funcs->set_priority)
+		return;
+
+	if (atomic_dec_return(&ring->num_jobs[priority]) > 0)
+		return;
+
+	/* no need to restore if the job is already at the lowest priority */
+	if (priority == AMD_SCHED_PRIORITY_NORMAL)
+		return;
+
+	mutex_lock(&ring->priority_mutex);
+	/* something higher prio is executing, no need to decay */
+	if (ring->priority > priority)
+		goto out_unlock;
+
+	/* decay priority to the next level with a job available */
+	for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) {
+		if (i == AMD_SCHED_PRIORITY_NORMAL
+				|| atomic_read(&ring->num_jobs[i])) {
+			ring->priority = i;
+			ring->funcs->set_priority(ring, i);
+			break;
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&ring->priority_mutex);
+}
+
+/**
+ * amdgpu_ring_priority_get - change the ring's priority
+ *
+ * @ring: amdgpu_ring structure holding the information
+ * @priority: target priority
+ *
+ * Request a ring's priority to be raised to @priority (refcounted).
+ */
+void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
+			      enum amd_sched_priority priority)
+{
+	if (!ring->funcs->set_priority)
+		return;
+
+	atomic_inc(&ring->num_jobs[priority]);
+
+	mutex_lock(&ring->priority_mutex);
+	if (priority <= ring->priority)
+		goto out_unlock;
+
+	ring->priority = priority;
+	ring->funcs->set_priority(ring, priority);
+
+out_unlock:
+	mutex_unlock(&ring->priority_mutex);
+}
+
 /**
  * amdgpu_ring_init - init driver ring struct.
  *
@@ -169,7 +238,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned max_dw, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type)
 {
-	int r;
+	int r, i;
 	int sched_hw_submission = amdgpu_sched_hw_submission;
 
 	/* Set the hw submission limit higher for KIQ because
@@ -247,9 +316,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	}
 
 	ring->max_dw = max_dw;
+	ring->priority = AMD_SCHED_PRIORITY_NORMAL;
+	mutex_init(&ring->priority_mutex);
 	INIT_LIST_HEAD(&ring->lru_list);
 	amdgpu_ring_lru_touch(adev, ring);
 
+	for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i)
+		atomic_set(&ring->num_jobs[i], 0);
+
 	if (amdgpu_debugfs_ring_init(adev, ring)) {
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 491bd5512dcc..0d9ce141404c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -24,6 +24,7 @@
 #ifndef __AMDGPU_RING_H__
 #define __AMDGPU_RING_H__
 
+#include <drm/amdgpu_drm.h>
 #include "gpu_scheduler.h"
 
 /* max number of rings */
@@ -56,6 +57,7 @@ struct amdgpu_device;
 struct amdgpu_ring;
 struct amdgpu_ib;
 struct amdgpu_cs_parser;
+struct amdgpu_job;
 
 /*
  * Fences.
@@ -147,6 +149,9 @@ struct amdgpu_ring_funcs {
 	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
+	/* priority functions */
+	void (*set_priority) (struct amdgpu_ring *ring,
+			      enum amd_sched_priority priority);
 };
 
 struct amdgpu_ring {
@@ -187,6 +192,12 @@ struct amdgpu_ring {
 	volatile u32		*cond_exe_cpu_addr;
 	unsigned		vm_inv_eng;
 	bool			has_compute_vm_bug;
+
+	atomic_t		num_jobs[AMD_SCHED_PRIORITY_MAX];
+	struct mutex		priority_mutex;
+	/* protected by priority_mutex */
+	int			priority;
+
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
 #endif
@@ -197,6 +208,10 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
+void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
+			      enum amd_sched_priority priority);
+void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
+			      enum amd_sched_priority priority);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
-- 
cgit 


From b8866c26ec072f1924f5cf601ebea33ca9823326 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Fri, 28 Apr 2017 20:05:51 -0400
Subject: drm/amdgpu: implement ring set_priority for gfx_v8 compute v9
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.

Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
        0x2: CS_H
        0x1: CS_M
        0x0: CS_L

The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.

In order to prevent getting stuck in loops of resources bouncing between
GFX and high priority compute and introducing further latency, we
statically reserve a portion of the pipe.

v2: fix srbm_select to ring->queue and use ring->funcs->type
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*
v4: switch int to enum amd_sched_priority
v5: corresponding changes for srbm_lock
v6: change CU reservation to PIPE_PERCENT allocation
v7: use kiq instead of MMIO
v8: back to MMIO, and make the implementation sleep safe.
v9: corresponding changes for splitting HIGH into _HW/_SW

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 99 ++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5c4bed7778d9..715ce4863bc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1035,6 +1035,10 @@ struct amdgpu_gfx {
 	bool                            in_suspend;
 	/* NGG */
 	struct amdgpu_ngg		ngg;
+
+	/* pipe reservation */
+	struct mutex			pipe_reserve_mutex;
+	DECLARE_BITMAP			(pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 };
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7b3e3b5461c3..412ad99d8871 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2094,6 +2094,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->vm_manager.vm_pte_num_rings = 0;
 	adev->gart.gart_funcs = NULL;
 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
+	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 
 	adev->smc_rreg = &amdgpu_invalid_rreg;
 	adev->smc_wreg = &amdgpu_invalid_wreg;
@@ -2122,6 +2123,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->pm.mutex);
 	mutex_init(&adev->gfx.gpu_clock_mutex);
 	mutex_init(&adev->srbm_mutex);
+	mutex_init(&adev->gfx.pipe_reserve_mutex);
 	mutex_init(&adev->grbm_idx_mutex);
 	mutex_init(&adev->mn_lock);
 	mutex_init(&adev->virt.vf_errors.lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 147e92b3a959..51896b7353b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6394,6 +6394,104 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 }
 
+static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
+					   bool acquire)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int pipe_num, tmp, reg;
+	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
+
+	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
+
+	/* first me only has 2 entries, GFX and HP3D */
+	if (ring->me > 0)
+		pipe_num -= 2;
+
+	reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
+	tmp = RREG32(reg);
+	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
+	WREG32(reg, tmp);
+}
+
+static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
+					    struct amdgpu_ring *ring,
+					    bool acquire)
+{
+	int i, pipe;
+	bool reserve;
+	struct amdgpu_ring *iring;
+
+	mutex_lock(&adev->gfx.pipe_reserve_mutex);
+	pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
+	if (acquire)
+		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+	else
+		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+
+	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
+		/* Clear all reservations - everyone reacquires all resources */
+		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
+			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
+						       true);
+
+		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
+						       true);
+	} else {
+		/* Lower all pipes without a current reservation */
+		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+			iring = &adev->gfx.gfx_ring[i];
+			pipe = amdgpu_gfx_queue_to_bit(adev,
+						       iring->me,
+						       iring->pipe,
+						       0);
+			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
+		}
+
+		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+			iring = &adev->gfx.compute_ring[i];
+			pipe = amdgpu_gfx_queue_to_bit(adev,
+						       iring->me,
+						       iring->pipe,
+						       0);
+			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
+		}
+	}
+
+	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
+}
+
+static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
+				      struct amdgpu_ring *ring,
+				      bool acquire)
+{
+	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
+	uint32_t queue_priority = acquire ? 0xf : 0x0;
+
+	mutex_lock(&adev->srbm_mutex);
+	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
+	WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
+
+	vi_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+					       enum amd_sched_priority priority)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
+
+	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+		return;
+
+	gfx_v8_0_hqd_set_priority(adev, ring, acquire);
+	gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
+}
+
 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
 					     u64 addr, u64 seq,
 					     unsigned flags)
@@ -6839,6 +6937,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.set_priority = gfx_v8_0_ring_set_priority_compute,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
-- 
cgit 


From b6d8a439345e71e9b1939c4d6997e09b5be9b5e1 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Wed, 24 May 2017 17:00:10 -0400
Subject: drm/amdgpu: make amdgpu_to_sched_priority detect invalid parameters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Returning invalid priorities as _NORMAL is a backwards compatibility
quirk of amdgpu_ctx_ioctl(). Move this detail one layer up where it
belongs.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 52388b1b52c2..d3d63f78bec9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -232,7 +232,7 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
 		return AMD_SCHED_PRIORITY_LOW;
 	default:
 		WARN(1, "Invalid context priority %d\n", amdgpu_priority);
-		return AMD_SCHED_PRIORITY_NORMAL;
+		return AMD_SCHED_PRIORITY_INVALID;
 	}
 }
 
@@ -251,8 +251,10 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 	id = args->in.ctx_id;
 	priority = amdgpu_to_sched_priority(args->in.priority);
 
-	if (priority >= AMD_SCHED_PRIORITY_MAX)
-		return -EINVAL;
+	/* For backwards compatibility reasons, we need to accept
+	 * ioctls with garbage in the priority field */
+	if (priority == AMD_SCHED_PRIORITY_INVALID)
+		priority = AMD_SCHED_PRIORITY_NORMAL;
 
 	switch (args->in.op) {
 	case AMDGPU_CTX_OP_ALLOC_CTX:
-- 
cgit 


From f3d19bf80d6c7bfe5922c09604a402ef176da41f Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Mon, 26 Jun 2017 16:12:10 -0400
Subject: drm/amdgpu: introduce AMDGPU_CTX_PRIORITY_UNSET
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use _INVALID to identify bad parameters and _UNSET to represent the
lack of interest in a specific value.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d3d63f78bec9..29eebdc30a4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -230,6 +230,8 @@ static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
 	case AMDGPU_CTX_PRIORITY_LOW_SW:
 	case AMDGPU_CTX_PRIORITY_LOW_HW:
 		return AMD_SCHED_PRIORITY_LOW;
+	case AMDGPU_CTX_PRIORITY_UNSET:
+		return AMD_SCHED_PRIORITY_UNSET;
 	default:
 		WARN(1, "Invalid context priority %d\n", amdgpu_priority);
 		return AMD_SCHED_PRIORITY_INVALID;
-- 
cgit 


From c23be4ae1d50e8d93f805de7ae3e8ea49a4ad781 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Tue, 6 Jun 2017 20:20:38 -0400
Subject: drm/amdgpu: add plumbing for ctx priority changes v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce amdgpu_ctx_priority_override(). A mechanism to override a
context's priority.

An override can be terminated by setting the override to
AMD_SCHED_PRIORITY_UNSET.

v2: change refcounted interface for a direct set

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  6 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 29 +++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 715ce4863bc8..951c8db01412 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -735,7 +735,9 @@ struct amdgpu_ctx {
 	spinlock_t		ring_lock;
 	struct dma_fence	**fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
-	bool preamble_presented;
+	bool 			preamble_presented;
+	enum amd_sched_priority init_priority;
+	enum amd_sched_priority override_priority;
 };
 
 struct amdgpu_ctx_mgr {
@@ -752,6 +754,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 			      struct dma_fence *fence, uint64_t *seq);
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 				   struct amdgpu_ring *ring, uint64_t seq);
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
+				  enum amd_sched_priority priority);
 
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 29eebdc30a4c..d2ef24f4b56d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -72,6 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	}
 
 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+	ctx->init_priority = priority;
+	ctx->override_priority = AMD_SCHED_PRIORITY_UNSET;
 
 	/* create context entity for each ring */
 	for (i = 0; i < adev->num_rings; i++) {
@@ -362,6 +364,33 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 	return fence;
 }
 
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
+				  enum amd_sched_priority priority)
+{
+	int i;
+	struct amdgpu_device *adev = ctx->adev;
+	struct amd_sched_rq *rq;
+	struct amd_sched_entity *entity;
+	struct amdgpu_ring *ring;
+	enum amd_sched_priority ctx_prio;
+
+	ctx->override_priority = priority;
+
+	ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ?
+			ctx->init_priority : ctx->override_priority;
+
+	for (i = 0; i < adev->num_rings; i++) {
+		ring = adev->rings[i];
+		entity = &ctx->rings[i].entity;
+		rq = &ring->sched.sched_rq[ctx_prio];
+
+		if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+			continue;
+
+		amd_sched_entity_set_rq(entity, rq);
+	}
+}
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 {
 	mutex_init(&mgr->lock);
-- 
cgit 


From 52c6a62c64fac03a434cdacf6ef671c6a9e9000f Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Mon, 26 Jun 2017 16:17:13 -0400
Subject: drm/amdgpu: add interface for editing a foreign process's priority v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE ioctls are used to set
the priority of a different process in the current system.

When a request is dropped, the process's contexts will be
restored to the priority specified at context creation time.

A request can be dropped by setting the override priority to
AMDGPU_CTX_PRIORITY_UNSET.

An fd is used to identify the remote process. This is simpler than
passing a pid number, which is vulnerable to re-use, etc.

This functionality is limited to DRM_MASTER since abuse of this
interface can have a negative impact on the system's performance.

v2: removed unused output structure
v3: change refcounted interface for a regular set operation

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile       |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   |  21 +-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 109 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h |  34 ++++++++++
 5 files changed, 147 insertions(+), 21 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 25a95c95df14..ef9a3b6d7b62 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-	amdgpu_queue_mgr.o amdgpu_vf_error.o
+	amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d2ef24f4b56d..a78b03f65c69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -25,6 +25,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_auth.h>
 #include "amdgpu.h"
+#include "amdgpu_sched.h"
 
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 				      enum amd_sched_priority priority)
@@ -220,26 +221,6 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
 	return 0;
 }
 
-static enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
-{
-	switch (amdgpu_priority) {
-	case AMDGPU_CTX_PRIORITY_HIGH_HW:
-		return AMD_SCHED_PRIORITY_HIGH_HW;
-	case AMDGPU_CTX_PRIORITY_HIGH_SW:
-		return AMD_SCHED_PRIORITY_HIGH_SW;
-	case AMDGPU_CTX_PRIORITY_NORMAL:
-		return AMD_SCHED_PRIORITY_NORMAL;
-	case AMDGPU_CTX_PRIORITY_LOW_SW:
-	case AMDGPU_CTX_PRIORITY_LOW_HW:
-		return AMD_SCHED_PRIORITY_LOW;
-	case AMDGPU_CTX_PRIORITY_UNSET:
-		return AMD_SCHED_PRIORITY_UNSET;
-	default:
-		WARN(1, "Invalid context priority %d\n", amdgpu_priority);
-		return AMD_SCHED_PRIORITY_INVALID;
-	}
-}
-
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 51841259e23f..82e8d43b235a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -28,6 +28,7 @@
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include <drm/amdgpu_drm.h>
+#include "amdgpu_sched.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
 
@@ -1023,6 +1024,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	/* KMS */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
new file mode 100644
index 000000000000..cd123306eda7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2017 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andres Rodriguez <andresx7@gmail.com>
+ */
+
+#include <linux/fdtable.h>
+#include <linux/pid.h>
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+
+#include "amdgpu_vm.h"
+
+enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
+{
+	switch (amdgpu_priority) {
+	case AMDGPU_CTX_PRIORITY_HIGH_HW:
+		return AMD_SCHED_PRIORITY_HIGH_HW;
+	case AMDGPU_CTX_PRIORITY_HIGH_SW:
+		return AMD_SCHED_PRIORITY_HIGH_SW;
+	case AMDGPU_CTX_PRIORITY_NORMAL:
+		return AMD_SCHED_PRIORITY_NORMAL;
+	case AMDGPU_CTX_PRIORITY_LOW_SW:
+	case AMDGPU_CTX_PRIORITY_LOW_HW:
+		return AMD_SCHED_PRIORITY_LOW;
+	case AMDGPU_CTX_PRIORITY_UNSET:
+		return AMD_SCHED_PRIORITY_UNSET;
+	default:
+		WARN(1, "Invalid context priority %d\n", amdgpu_priority);
+		return AMD_SCHED_PRIORITY_INVALID;
+	}
+}
+
+static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
+						  int fd,
+						  enum amd_sched_priority priority)
+{
+	struct file *filp = fcheck(fd);
+	struct drm_file *file;
+	struct pid *pid;
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_ctx *ctx;
+	uint32_t id;
+
+	if (!filp)
+		return -EINVAL;
+
+	pid = get_pid(((struct drm_file *)filp->private_data)->pid);
+
+	mutex_lock(&adev->ddev->filelist_mutex);
+	list_for_each_entry(file, &adev->ddev->filelist, lhead) {
+		if (file->pid != pid)
+			continue;
+
+		fpriv = file->driver_priv;
+		idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
+				amdgpu_ctx_priority_override(ctx, priority);
+	}
+	mutex_unlock(&adev->ddev->filelist_mutex);
+
+	put_pid(pid);
+
+	return 0;
+}
+
+int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *filp)
+{
+	union drm_amdgpu_sched *args = data;
+	struct amdgpu_device *adev = dev->dev_private;
+	enum amd_sched_priority priority;
+	int r;
+
+	priority = amdgpu_to_sched_priority(args->in.priority);
+	if (args->in.flags || priority == AMD_SCHED_PRIORITY_INVALID)
+		return -EINVAL;
+
+	switch (args->in.op) {
+	case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
+		r = amdgpu_sched_process_priority_override(adev,
+							   args->in.fd,
+							   priority);
+		break;
+	default:
+		DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
new file mode 100644
index 000000000000..b28c067d3822
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2017 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andres Rodriguez <andresx7@gmail.com>
+ */
+
+#ifndef __AMDGPU_SCHED_H__
+#define __AMDGPU_SCHED_H__
+
+#include <drm/drmP.h>
+
+enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority);
+int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *filp);
+
+#endif // __AMDGPU_SCHED_H__
-- 
cgit 


From 27b94b4f1386c3a8181f5a0277434a32e24e7dd7 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 1 Sep 2017 09:22:56 +0200
Subject: drm/amdgpu: fix placement flags in amdgpu_ttm_bind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise we lose the NO_EVICT flag and can try to evict pinned BOs.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7ef6c28a34d9..bc746131987f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -834,7 +834,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
 	placement.busy_placement = &placements;
 	placements.fpfn = 0;
 	placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
-	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
+	placements.flags = bo->mem.placement | TTM_PL_FLAG_TT;
 
 	r = ttm_bo_mem_space(bo, &placement, &tmp, true, false);
 	if (unlikely(r))
-- 
cgit 


From 1fd16f36be687ce32e875f59363c16d7cf050988 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Mon, 9 Oct 2017 16:35:12 -0400
Subject: drm/amd/amdgpu: Added asic_type as ACP DMA driver platform data

asic_type information is passed to ACP DMA Driver as platform data.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index a52795d9b458..ebca22302ebb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -371,6 +371,8 @@ static int acp_hw_init(void *handle)
 	adev->acp.acp_cell[0].name = "acp_audio_dma";
 	adev->acp.acp_cell[0].num_resources = 4;
 	adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+	adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+	adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
 
 	adev->acp.acp_cell[1].name = "designware-i2s";
 	adev->acp.acp_cell[1].num_resources = 1;
-- 
cgit 


From 8bc4c256f4995d315eb9cce6e47b4885c79ff661 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Fri, 13 Oct 2017 14:58:14 -0400
Subject: drm/amdgpu: rename context priority levels

Don't leak implementation details about how each priority behaves to
usermode. This allows greater flexibility in the future.

Squash into c2636dc53abd8269a0930bccd564f2f195dba729

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index cd123306eda7..290cc3f9c433 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -32,14 +32,14 @@
 enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
 {
 	switch (amdgpu_priority) {
-	case AMDGPU_CTX_PRIORITY_HIGH_HW:
+	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
 		return AMD_SCHED_PRIORITY_HIGH_HW;
-	case AMDGPU_CTX_PRIORITY_HIGH_SW:
+	case AMDGPU_CTX_PRIORITY_HIGH:
 		return AMD_SCHED_PRIORITY_HIGH_SW;
 	case AMDGPU_CTX_PRIORITY_NORMAL:
 		return AMD_SCHED_PRIORITY_NORMAL;
-	case AMDGPU_CTX_PRIORITY_LOW_SW:
-	case AMDGPU_CTX_PRIORITY_LOW_HW:
+	case AMDGPU_CTX_PRIORITY_LOW:
+	case AMDGPU_CTX_PRIORITY_VERY_LOW:
 		return AMD_SCHED_PRIORITY_LOW;
 	case AMDGPU_CTX_PRIORITY_UNSET:
 		return AMD_SCHED_PRIORITY_UNSET;
-- 
cgit 


From b285f1db6b3d7bf1c088c3700ffc54b8e676bcea Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 9 Oct 2017 16:28:16 -0400
Subject: drm/amdgpu: bump version for new AMDGPU_SCHED ioctl

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ad02d3fbb44c..b9a32585f58d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -71,9 +71,10 @@
  * - 3.19.0 - Add support for UVD MJPEG decode
  * - 3.20.0 - Add support for local BOs
  * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
+ * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	21
+#define KMS_DRIVER_MINOR	22
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
-- 
cgit 


From 202f5d6e1a5962e3acb21c50cd9b4e3ab5990dab Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 9 Oct 2017 13:50:31 +0800
Subject: drm/amdgpu: refine code delete duplicated error handling

in function amdgpu_ucode_init_bo, when failed, it will
set load_type to AMDGPU_FW_LOAD_DIRECT.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 412ad99d8871..1f793eb301ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1711,8 +1711,8 @@ static int amdgpu_init(struct amdgpu_device *adev)
 	}
 
 	mutex_lock(&adev->firmware.mutex);
-	if (amdgpu_ucode_init_bo(adev))
-		adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
+		amdgpu_ucode_init_bo(adev);
 	mutex_unlock(&adev->firmware.mutex);
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
-- 
cgit 


From f15507a1ac0dcdbda0c6c4fe4dc168bfe0034535 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 11 Oct 2017 10:21:11 +0100
Subject: drm/amdgpu: make function uvd_v6_0_enc_get_destroy_msg static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function uvd_v6_0_enc_get_destroy_msg is local to the source and
does not need to be in global scope, so make it static.

Cleans up sparse warning:
symbol 'uvd_v6_0_enc_get_destroy_msg' was not declared. Should it be
static?

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 60af7310a234..71299c67c517 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -268,8 +268,9 @@ err:
  *
  * Close up a stream for HW test or if userspace failed to do so
  */
-int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-				 bool direct, struct dma_fence **fence)
+static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
+					uint32_t handle,
+					bool direct, struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 16;
 	struct amdgpu_job *job;
-- 
cgit 


From ad864d243826cedc53404a1c0db7d1e38ddceb84 Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Tue, 10 Oct 2017 16:50:16 -0400
Subject: drm/amdgpu: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This enables old fence waiting before reservation lock is aquired
which in turn is part of a bigger solution to deadlock happening
when gpu reset with VRAM recovery accures during intensive rendering.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 112 ++++++++++++++++++---------------
 1 file changed, 61 insertions(+), 51 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fe7dd44ac9fe..9166d5e1e557 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -845,15 +845,60 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_ring *ring = p->job->ring;
-	int i, r;
+	int i, j, r;
+
+	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
+
+		struct amdgpu_cs_chunk *chunk;
+		struct amdgpu_ib *ib;
+		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
+
+		chunk = &p->chunks[i];
+		ib = &p->job->ibs[j];
+		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
+
+		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+			continue;
+
+		if (p->job->ring->funcs->parse_cs) {
+			struct amdgpu_bo_va_mapping *m;
+			struct amdgpu_bo *aobj = NULL;
+			uint64_t offset;
+			uint8_t *kptr;
+
+			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
+					&aobj, &m);
+			if (r) {
+				DRM_ERROR("IB va_start is invalid\n");
+				return r;
+			}
 
-	/* Only for UVD/VCE VM emulation */
-	if (ring->funcs->parse_cs) {
-		for (i = 0; i < p->job->num_ibs; i++) {
-			r = amdgpu_ring_parse_cs(ring, p, i);
+			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
+				(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+				return -EINVAL;
+			}
+
+			/* the IB should be reserved at this point */
+			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+			if (r) {
+				return r;
+			}
+
+			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
+			kptr += chunk_ib->va_start - offset;
+
+			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+			amdgpu_bo_kunmap(aobj);
+
+			/* Only for UVD/VCE VM emulation */
+			r = amdgpu_ring_parse_cs(ring, p, j);
 			if (r)
 				return r;
+
 		}
+
+		j++;
 	}
 
 	if (p->job->vm) {
@@ -919,54 +964,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 
 		parser->job->ring = ring;
 
-		if (ring->funcs->parse_cs) {
-			struct amdgpu_bo_va_mapping *m;
-			struct amdgpu_bo *aobj = NULL;
-			uint64_t offset;
-			uint8_t *kptr;
-
-			r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
-						   &aobj, &m);
-			if (r) {
-				DRM_ERROR("IB va_start is invalid\n");
-				return r;
-			}
-
-			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
-			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
-				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
-				return -EINVAL;
-			}
-
-			/* the IB should be reserved at this point */
-			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
-			if (r) {
-				return r;
-			}
-
-			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
-			kptr += chunk_ib->va_start - offset;
-
-			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
-			if (r) {
-				DRM_ERROR("Failed to get ib !\n");
-				return r;
-			}
-
-			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
-			amdgpu_bo_kunmap(aobj);
-		} else {
-			r =  amdgpu_ib_get(adev, vm, 0, ib);
-			if (r) {
-				DRM_ERROR("Failed to get ib !\n");
-				return r;
-			}
-
+		r =  amdgpu_ib_get(adev, vm,
+					ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
+					ib);
+		if (r) {
+			DRM_ERROR("Failed to get ib !\n");
+			return r;
 		}
 
 		ib->gpu_addr = chunk_ib->va_start;
 		ib->length_dw = chunk_ib->ib_bytes / 4;
 		ib->flags = chunk_ib->flags;
+
 		j++;
 	}
 
@@ -1212,6 +1221,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		goto out;
 	}
 
+	r = amdgpu_cs_ib_fill(adev, &parser);
+	if (r)
+		goto out;
+
 	r = amdgpu_cs_parser_bos(&parser, data);
 	if (r) {
 		if (r == -ENOMEM)
@@ -1222,9 +1235,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	}
 
 	reserved_buffers = true;
-	r = amdgpu_cs_ib_fill(adev, &parser);
-	if (r)
-		goto out;
 
 	r = amdgpu_cs_dependencies(adev, &parser);
 	if (r) {
-- 
cgit 


From 0ae94444c08a0adf2fab4aab26be0646ee445a19 Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Tue, 10 Oct 2017 16:50:17 -0400
Subject: drm/amdgpu: Move old fence waiting before reservation lock is aquired
 v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Helps avoiding deadlock during GPU reset.
Added mutex to amdgpu_ctx to preserve order of fences on a ring.

v2:
Put waiting logic in a function in a seperate function in amdgpu_ctx.c

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 ++++++++++++++++++++++++------
 3 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 951c8db01412..76033e2cdba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -738,6 +738,7 @@ struct amdgpu_ctx {
 	bool 			preamble_presented;
 	enum amd_sched_priority init_priority;
 	enum amd_sched_priority override_priority;
+	struct mutex            lock;
 };
 
 struct amdgpu_ctx_mgr {
@@ -760,9 +761,12 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp);
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
+
 /*
  * file private structure
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9166d5e1e557..5de092eab0fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		goto free_chunk;
 	}
 
+	mutex_lock(&p->ctx->lock);
+
 	/* get chunks */
 	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user,
@@ -737,8 +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 
 	dma_fence_put(parser->fence);
 
-	if (parser->ctx)
+	if (parser->ctx) {
+		mutex_unlock(&parser->ctx->lock);
 		amdgpu_ctx_put(parser->ctx);
+	}
 	if (parser->bo_list)
 		amdgpu_bo_list_put(parser->bo_list);
 
@@ -895,9 +899,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 			r = amdgpu_ring_parse_cs(ring, p, j);
 			if (r)
 				return r;
-
 		}
-
 		j++;
 	}
 
@@ -985,7 +987,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
 
-	return 0;
+	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
 }
 
 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a78b03f65c69..4309820658c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	if (!ctx->fences)
 		return -ENOMEM;
 
+	mutex_init(&ctx->lock);
+
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		ctx->rings[i].sequence = 1;
 		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
@@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 				      &ctx->rings[i].entity);
 
 	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+
+	mutex_destroy(&ctx->lock);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 
 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = cring->fences[idx];
-	if (other) {
-		signed long r;
-		r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
-		if (r < 0)
-			return r;
-	}
+	if (other)
+		BUG_ON(!dma_fence_is_signaled(other));
 
 	dma_fence_get(fence);
 
@@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 	}
 }
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+{
+	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
+	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
+	struct dma_fence *other = cring->fences[idx];
+
+	if (other) {
+		signed long r;
+		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+		if (r < 0) {
+			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+			return r;
+		}
+	}
+
+	return 0;
+}
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 {
 	mutex_init(&mgr->lock);
-- 
cgit 


From 2dc8f81e4f822cfe8f6475da968ab2dd5881b8d8 Mon Sep 17 00:00:00 2001
From: Horace Chen <horace.chen@amd.com>
Date: Mon, 9 Oct 2017 16:17:16 +0800
Subject: drm/amdgpu: SR-IOV data exchange between PF&VF

SR-IOV need to exchange some data between PF&VF through shared VRAM

PF will copy some necessary firmware and information to the shared
VRAM. It also requires some information from VF. PF will send a
key through mailbox2 to help guest calculate checksum so that it can
verify whether the data is correct.

So check the data on the specified offset of the shared VRAM, if the
checksum is right, read values from it and write some VF information
next to the data from PF.

Signed-off-by: Horace Chen <horace.chen@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   |  75 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   | 178 +++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c      |   6 +
 4 files changed, 262 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1f793eb301ef..9cdaba4af216 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2300,6 +2300,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r)
 		DRM_ERROR("ib ring test failed (%d).\n", r);
 
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_init_data_exchange(adev);
+
 	amdgpu_fbdev_init(adev);
 
 	r = amdgpu_pm_sysfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index ab05121b9272..ed7be2eb24b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -274,3 +274,78 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
 			      (void *)&adev->virt.mm_table.cpu_addr);
 	adev->virt.mm_table.gpu_addr = 0;
 }
+
+
+int amdgpu_virt_fw_reserve_get_checksum(void *obj,
+					unsigned long obj_size,
+					unsigned int key,
+					unsigned int chksum)
+{
+	unsigned int ret = key;
+	unsigned long i = 0;
+	unsigned char *pos;
+
+	pos = (char *)obj;
+	/* calculate checksum */
+	for (i = 0; i < obj_size; ++i)
+		ret += *(pos + i);
+	/* minus the chksum itself */
+	pos = (char *)&chksum;
+	for (i = 0; i < sizeof(chksum); ++i)
+		ret -= *(pos + i);
+	return ret;
+}
+
+void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
+{
+	uint32_t pf2vf_ver = 0;
+	uint32_t pf2vf_size = 0;
+	uint32_t checksum = 0;
+	uint32_t checkval;
+	char *str;
+
+	adev->virt.fw_reserve.p_pf2vf = NULL;
+	adev->virt.fw_reserve.p_vf2pf = NULL;
+
+	if (adev->fw_vram_usage.va != NULL) {
+		adev->virt.fw_reserve.p_pf2vf =
+			(struct amdgim_pf2vf_info_header *)(
+			adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET);
+		pf2vf_ver = adev->virt.fw_reserve.p_pf2vf->version;
+		AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size);
+		AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum);
+
+		/* pf2vf message must be in 4K */
+		if (pf2vf_size > 0 && pf2vf_size < 4096) {
+			checkval = amdgpu_virt_fw_reserve_get_checksum(
+				adev->virt.fw_reserve.p_pf2vf, pf2vf_size,
+				adev->virt.fw_reserve.checksum_key, checksum);
+			if (checkval == checksum) {
+				adev->virt.fw_reserve.p_vf2pf =
+					((void *)adev->virt.fw_reserve.p_pf2vf +
+					pf2vf_size);
+				memset((void *)adev->virt.fw_reserve.p_vf2pf, 0,
+					sizeof(amdgim_vf2pf_info));
+				AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.version,
+					AMDGPU_FW_VRAM_VF2PF_VER);
+				AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.size,
+					sizeof(amdgim_vf2pf_info));
+				AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version,
+					&str);
+				if (THIS_MODULE->version != NULL)
+					strcpy(str, THIS_MODULE->version);
+				else
+					strcpy(str, "N/A");
+				AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert,
+					0);
+				AMDGPU_FW_VRAM_VF2PF_WRITE(adev, checksum,
+					amdgpu_virt_fw_reserve_get_checksum(
+					adev->virt.fw_reserve.p_vf2pf,
+					pf2vf_size,
+					adev->virt.fw_reserve.checksum_key, 0));
+			}
+		}
+	}
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index e5fd0ff6b29d..b89d37fc406f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -58,6 +58,179 @@ struct amdgpu_virt_ops {
 	void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
 };
 
+/*
+ * Firmware Reserve Frame buffer
+ */
+struct amdgpu_virt_fw_reserve {
+	struct amdgim_pf2vf_info_header *p_pf2vf;
+	struct amdgim_vf2pf_info_header *p_vf2pf;
+	unsigned int checksum_key;
+};
+/*
+ * Defination between PF and VF
+ * Structures forcibly aligned to 4 to keep the same style as PF.
+ */
+#define AMDGIM_DATAEXCHANGE_OFFSET		(64 * 1024)
+
+#define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \
+		(total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2))
+
+enum AMDGIM_FEATURE_FLAG {
+	/* GIM supports feature of Error log collecting */
+	AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1,
+	/* GIM supports feature of loading uCodes */
+	AMDGIM_FEATURE_GIM_LOAD_UCODES   = 0x2,
+};
+
+struct amdgim_pf2vf_info_header {
+	/* the total structure size in byte. */
+	uint32_t size;
+	/* version of this structure, written by the GIM */
+	uint32_t version;
+} __aligned(4);
+struct  amdgim_pf2vf_info_v1 {
+	/* header contains size and version */
+	struct amdgim_pf2vf_info_header header;
+	/* max_width * max_height */
+	unsigned int uvd_enc_max_pixels_count;
+	/* 16x16 pixels/sec, codec independent */
+	unsigned int uvd_enc_max_bandwidth;
+	/* max_width * max_height */
+	unsigned int vce_enc_max_pixels_count;
+	/* 16x16 pixels/sec, codec independent */
+	unsigned int vce_enc_max_bandwidth;
+	/* MEC FW position in kb from the start of visible frame buffer */
+	unsigned int mecfw_kboffset;
+	/* The features flags of the GIM driver supports. */
+	unsigned int feature_flags;
+	/* use private key from mailbox 2 to create chueksum */
+	unsigned int checksum;
+} __aligned(4);
+
+struct  amdgim_pf2vf_info_v2 {
+	/* header contains size and version */
+	struct amdgim_pf2vf_info_header header;
+	/* use private key from mailbox 2 to create chueksum */
+	uint32_t checksum;
+	/* The features flags of the GIM driver supports. */
+	uint32_t feature_flags;
+	/* max_width * max_height */
+	uint32_t uvd_enc_max_pixels_count;
+	/* 16x16 pixels/sec, codec independent */
+	uint32_t uvd_enc_max_bandwidth;
+	/* max_width * max_height */
+	uint32_t vce_enc_max_pixels_count;
+	/* 16x16 pixels/sec, codec independent */
+	uint32_t vce_enc_max_bandwidth;
+	/* MEC FW position in kb from the start of VF visible frame buffer */
+	uint64_t mecfw_kboffset;
+	/* MEC FW size in KB */
+	uint32_t mecfw_ksize;
+	/* UVD FW position in kb from the start of VF visible frame buffer */
+	uint64_t uvdfw_kboffset;
+	/* UVD FW size in KB */
+	uint32_t uvdfw_ksize;
+	/* VCE FW position in kb from the start of VF visible frame buffer */
+	uint64_t vcefw_kboffset;
+	/* VCE FW size in KB */
+	uint32_t vcefw_ksize;
+	uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)];
+} __aligned(4);
+
+
+struct amdgim_vf2pf_info_header {
+	/* the total structure size in byte. */
+	uint32_t size;
+	/*version of this structure, written by the guest */
+	uint32_t version;
+} __aligned(4);
+
+struct amdgim_vf2pf_info_v1 {
+	/* header contains size and version */
+	struct amdgim_vf2pf_info_header header;
+	/* driver version */
+	char driver_version[64];
+	/* driver certification, 1=WHQL, 0=None */
+	unsigned int driver_cert;
+	/* guest OS type and version: need a define */
+	unsigned int os_info;
+	/* in the unit of 1M */
+	unsigned int fb_usage;
+	/* guest gfx engine usage percentage */
+	unsigned int gfx_usage;
+	/* guest gfx engine health percentage */
+	unsigned int gfx_health;
+	/* guest compute engine usage percentage */
+	unsigned int compute_usage;
+	/* guest compute engine health percentage */
+	unsigned int compute_health;
+	/* guest vce engine usage percentage. 0xffff means N/A. */
+	unsigned int vce_enc_usage;
+	/* guest vce engine health percentage. 0xffff means N/A. */
+	unsigned int vce_enc_health;
+	/* guest uvd engine usage percentage. 0xffff means N/A. */
+	unsigned int uvd_enc_usage;
+	/* guest uvd engine usage percentage. 0xffff means N/A. */
+	unsigned int uvd_enc_health;
+	unsigned int checksum;
+} __aligned(4);
+
+struct amdgim_vf2pf_info_v2 {
+	/* header contains size and version */
+	struct amdgim_vf2pf_info_header header;
+	uint32_t checksum;
+	/* driver version */
+	uint8_t driver_version[64];
+	/* driver certification, 1=WHQL, 0=None */
+	uint32_t driver_cert;
+	/* guest OS type and version: need a define */
+	uint32_t os_info;
+	/* in the unit of 1M */
+	uint32_t fb_usage;
+	/* guest gfx engine usage percentage */
+	uint32_t gfx_usage;
+	/* guest gfx engine health percentage */
+	uint32_t gfx_health;
+	/* guest compute engine usage percentage */
+	uint32_t compute_usage;
+	/* guest compute engine health percentage */
+	uint32_t compute_health;
+	/* guest vce engine usage percentage. 0xffff means N/A. */
+	uint32_t vce_enc_usage;
+	/* guest vce engine health percentage. 0xffff means N/A. */
+	uint32_t vce_enc_health;
+	/* guest uvd engine usage percentage. 0xffff means N/A. */
+	uint32_t uvd_enc_usage;
+	/* guest uvd engine usage percentage. 0xffff means N/A. */
+	uint32_t uvd_enc_health;
+	uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)];
+} __aligned(4);
+
+#define AMDGPU_FW_VRAM_VF2PF_VER 2
+typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
+
+#define AMDGPU_FW_VRAM_VF2PF_WRITE(adev, field, val) \
+	do { \
+		((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field = (val); \
+	} while (0)
+
+#define AMDGPU_FW_VRAM_VF2PF_READ(adev, field, val) \
+	do { \
+		(*val) = ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field; \
+	} while (0)
+
+#define AMDGPU_FW_VRAM_PF2VF_READ(adev, field, val) \
+	do { \
+		if (!adev->virt.fw_reserve.p_pf2vf) \
+			*(val) = 0; \
+		else { \
+			if (adev->virt.fw_reserve.p_pf2vf->version == 1) \
+				*(val) = ((struct amdgim_pf2vf_info_v1 *)adev->virt.fw_reserve.p_pf2vf)->field; \
+			if (adev->virt.fw_reserve.p_pf2vf->version == 2) \
+				*(val) = ((struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf)->field; \
+		} \
+	} while (0)
+
 /* GPU virtualization */
 struct amdgpu_virt {
 	uint32_t			caps;
@@ -72,6 +245,7 @@ struct amdgpu_virt {
 	struct amdgpu_mm_table		mm_table;
 	const struct amdgpu_virt_ops	*ops;
 	struct amdgpu_vf_error_buffer   vf_errors;
+	struct amdgpu_virt_fw_reserve	fw_reserve;
 };
 
 #define AMDGPU_CSA_SIZE    (8 * 1024)
@@ -114,5 +288,9 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
 int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
+					unsigned int key,
+					unsigned int chksum);
+void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 2812d88a8bdd..b4906d2f30d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -183,6 +183,12 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
 			return r;
 		}
+		/* Retrieve checksum from mailbox2 */
+		if (req == IDH_REQ_GPU_INIT_ACCESS) {
+			adev->virt.fw_reserve.checksum_key =
+				RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+					mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
+		}
 	}
 
 	return 0;
-- 
cgit 


From 76d6172b6fab16455af4b67bb18a3f66011592f8 Mon Sep 17 00:00:00 2001
From: Ken Wang <Ken.Wang@amd.com>
Date: Fri, 29 Sep 2017 15:41:43 +0800
Subject: drm/amdgpu: correct reference clock value on vega10

Old value from bringup was wrong.

Cc: stable@vger.kernel.org
Signed-off-by: Ken Wang <Ken.Wang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 1c006ba9d826..3ca9d114f630 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -279,10 +279,7 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev)
 }
 static u32 soc15_get_xclk(struct amdgpu_device *adev)
 {
-	if (adev->asic_type == CHIP_VEGA10)
-		return adev->clock.spll.reference_freq/4;
-	else
-		return adev->clock.spll.reference_freq;
+	return adev->clock.spll.reference_freq;
 }
 
 
-- 
cgit 


From 396bcb41e035df7b98fb150ca950bf213e70ae7b Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 9 Oct 2017 14:45:09 +0200
Subject: drm/amdgpu: partial revert VRAM lost handling v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Keep blocking the CS, but revert everything else. Mapping BOs and info IOCTL
are harmless and can still happen even when VRAM content ist lost.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 11 -----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  5 -----
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 ++++++++++----
 3 files changed, 10 insertions(+), 20 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5de092eab0fa..0c07df72743c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1272,16 +1272,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 {
 	union drm_amdgpu_wait_cs *wait = data;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
 	struct amdgpu_ring *ring = NULL;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	long r;
 
-	if (amdgpu_kms_vram_lost(adev, fpriv))
-		return -ENODEV;
-
 	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
 	if (ctx == NULL)
 		return -EINVAL;
@@ -1350,16 +1346,12 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	union drm_amdgpu_fence_to_handle *info = data;
 	struct dma_fence *fence;
 	struct drm_syncobj *syncobj;
 	struct sync_file *sync_file;
 	int fd, r;
 
-	if (amdgpu_kms_vram_lost(adev, fpriv))
-		return -ENODEV;
-
 	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
@@ -1521,15 +1513,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	union drm_amdgpu_wait_fences *wait = data;
 	uint32_t fence_count = wait->in.fence_count;
 	struct drm_amdgpu_fence *fences_user;
 	struct drm_amdgpu_fence *fences;
 	int r;
 
-	if (amdgpu_kms_vram_lost(adev, fpriv))
-		return -ENODEV;
 	/* Get the fences from userspace */
 	fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
 			GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 21e99366cab3..fb72edc4c026 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -579,11 +579,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 			args->operation);
 		return -EINVAL;
 	}
-	if ((args->operation == AMDGPU_VA_OP_MAP) ||
-	    (args->operation == AMDGPU_VA_OP_REPLACE)) {
-		if (amdgpu_kms_vram_lost(adev, fpriv))
-			return -ENODEV;
-	}
 
 	INIT_LIST_HEAD(&list);
 	INIT_LIST_HEAD(&duplicates);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 82e8d43b235a..f759836d10ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -270,7 +270,6 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	struct drm_amdgpu_info *info = data;
 	struct amdgpu_mode_info *minfo = &adev->mode_info;
 	void __user *out = (void __user *)(uintptr_t)info->return_pointer;
@@ -283,8 +282,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 
 	if (!info->return_size || !info->return_pointer)
 		return -EINVAL;
-	if (amdgpu_kms_vram_lost(adev, fpriv))
-		return -ENODEV;
 
 	switch (info->query) {
 	case AMDGPU_INFO_ACCEL_WORKING:
@@ -792,10 +789,19 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
 	vga_switcheroo_process_delayed_switch();
 }
 
+/**
+ * amdgpu_kms_vram_lost - check if VRAM was lost for this client
+ *
+ * @adev: amdgpu device
+ * @fpriv: client private
+ *
+ * Check if all CS is blocked for the client because of lost VRAM
+ */
 bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
 			  struct amdgpu_fpriv *fpriv)
 {
-	return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter);
+	return fpriv->vram_lost_counter !=
+		atomic_read(&adev->vram_lost_counter);
 }
 
 /**
-- 
cgit 


From 14e47f93c5cc4a1237dbacc137e174706093b69c Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 9 Oct 2017 15:04:41 +0200
Subject: drm/amdgpu: keep copy of VRAM lost counter in job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of reading the current counter from fpriv.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 13 +++++++------
 3 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 76033e2cdba8..aa70f8c045b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1125,6 +1125,7 @@ struct amdgpu_job {
 	uint32_t		gds_base, gds_size;
 	uint32_t		gws_base, gws_size;
 	uint32_t		oa_base, oa_size;
+	uint32_t		vram_lost_counter;
 
 	/* user fence handling */
 	uint64_t		uf_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 0c07df72743c..9daa7cac0ffb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -172,6 +172,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (ret)
 		goto free_all_kdata;
 
+	p->job->vram_lost_counter = fpriv->vram_lost_counter;
+
 	if (p->uf_entry.robj)
 		p->job->uf_addr = uf_offset;
 	kfree(chunk_array);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 83d13431cbdd..4f2b5acc8743 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -61,6 +61,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
+	(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 
 	amdgpu_sync_create(&(*job)->sync);
 	amdgpu_sync_create(&(*job)->dep_sync);
@@ -180,8 +181,8 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 {
 	struct dma_fence *fence = NULL;
+	struct amdgpu_device *adev;
 	struct amdgpu_job *job;
-	struct amdgpu_fpriv *fpriv = NULL;
 	int r;
 
 	if (!sched_job) {
@@ -189,17 +190,17 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 		return NULL;
 	}
 	job = to_amdgpu_job(sched_job);
+	adev = job->adev;
 
 	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
 
 	trace_amdgpu_sched_run_job(job);
-	if (job->vm)
-		fpriv = container_of(job->vm, struct amdgpu_fpriv, vm);
 	/* skip ib schedule when vram is lost */
-	if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv))
+	if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) {
 		DRM_ERROR("Skip scheduling IBs!\n");
-	else {
-		r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence);
+	} else {
+		r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job,
+				       &fence);
 		if (r)
 			DRM_ERROR("Error scheduling IBs (%d)\n", r);
 	}
-- 
cgit 


From e55f2b646df3318e24f12b8388ab6e5cccb3e92d Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 9 Oct 2017 15:18:43 +0200
Subject: drm/amdgpu: move the VRAM lost counter per context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of per device track the VRAM lost per context and return ECANCELED
instead of ENODEV.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  6 ++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  9 +++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 ----------------
 4 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index aa70f8c045b1..67b864436be1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -732,10 +732,11 @@ struct amdgpu_ctx {
 	struct amdgpu_device    *adev;
 	struct amdgpu_queue_mgr queue_mgr;
 	unsigned		reset_counter;
+	uint32_t		vram_lost_counter;
 	spinlock_t		ring_lock;
 	struct dma_fence	**fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
-	bool 			preamble_presented;
+	bool			preamble_presented;
 	enum amd_sched_priority init_priority;
 	enum amd_sched_priority override_priority;
 	struct mutex            lock;
@@ -778,7 +779,6 @@ struct amdgpu_fpriv {
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
-	u32			vram_lost_counter;
 };
 
 /*
@@ -1860,8 +1860,6 @@ static inline bool amdgpu_has_atpx(void) { return false; }
 extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
 extern const int amdgpu_max_kms_ioctl;
 
-bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
-			  struct amdgpu_fpriv *fpriv);
 int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
 void amdgpu_driver_unload_kms(struct drm_device *dev);
 void amdgpu_driver_lastclose_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9daa7cac0ffb..b355189533d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -172,7 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (ret)
 		goto free_all_kdata;
 
-	p->job->vram_lost_counter = fpriv->vram_lost_counter;
+	p->job->vram_lost_counter = atomic_read(&p->adev->vram_lost_counter);
+	if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
+		ret = -ECANCELED;
+		goto free_all_kdata;
+	}
 
 	if (p->uf_entry.robj)
 		p->job->uf_addr = uf_offset;
@@ -1205,7 +1209,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	union drm_amdgpu_cs *cs = data;
 	struct amdgpu_cs_parser parser = {};
 	bool reserved_buffers = false;
@@ -1213,8 +1216,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 	if (!adev->accel_working)
 		return -EBUSY;
-	if (amdgpu_kms_vram_lost(adev, fpriv))
-		return -ENODEV;
 
 	parser.adev = adev;
 	parser.filp = filp;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 4309820658c4..c184468e2b2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -75,6 +75,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	}
 
 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 	ctx->init_priority = priority;
 	ctx->override_priority = AMD_SCHED_PRIORITY_UNSET;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index f759836d10ef..ff1a416a66c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -789,21 +789,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
 	vga_switcheroo_process_delayed_switch();
 }
 
-/**
- * amdgpu_kms_vram_lost - check if VRAM was lost for this client
- *
- * @adev: amdgpu device
- * @fpriv: client private
- *
- * Check if all CS is blocked for the client because of lost VRAM
- */
-bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
-			  struct amdgpu_fpriv *fpriv)
-{
-	return fpriv->vram_lost_counter !=
-		atomic_read(&adev->vram_lost_counter);
-}
-
 /**
  * amdgpu_driver_open_kms - drm callback for open
  *
@@ -860,7 +845,6 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
-	fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 	file_priv->driver_priv = fpriv;
 
 out_suspend:
-- 
cgit 


From 7a0a48ddf63bc9944b9690c6fa043ea4305f7f79 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 9 Oct 2017 15:51:10 +0200
Subject: drm/amdgpu: set -ECANCELED when dropping jobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

And return from the wait functions the fence error code.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 7 ++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b355189533d2..2ae5d523ca10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1298,6 +1298,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 		r = PTR_ERR(fence);
 	else if (fence) {
 		r = dma_fence_wait_timeout(fence, true, timeout);
+		if (r > 0 && fence->error)
+			r = fence->error;
 		dma_fence_put(fence);
 	} else
 		r = 1;
@@ -1435,6 +1437,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
 
 		if (r == 0)
 			break;
+
+		if (fence->error)
+			return fence->error;
 	}
 
 	memset(wait, 0, sizeof(*wait));
@@ -1495,7 +1500,7 @@ out:
 	wait->out.status = (r > 0);
 	wait->out.first_signaled = first;
 	/* set return value 0 to indicate success */
-	r = 0;
+	r = array[first]->error;
 
 err_free_fence_array:
 	for (i = 0; i < fence_count; i++)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4f2b5acc8743..a8357885776e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -197,6 +197,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	trace_amdgpu_sched_run_job(job);
 	/* skip ib schedule when vram is lost */
 	if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) {
+		dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED);
 		DRM_ERROR("Skip scheduling IBs!\n");
 	} else {
 		r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job,
-- 
cgit 


From 1f7251b73e08395dbf03602a57ca67cf7da7f9db Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 9 Oct 2017 17:53:06 +0200
Subject: drm/amdgpu: add VRAM lost query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allows userspace to figure out if VRAM was lost.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index ff1a416a66c9..6f0b26dae3b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -763,6 +763,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		}
 		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
 	}
+	case AMDGPU_INFO_VRAM_LOST_COUNTER:
+		ui32 = atomic_read(&adev->vram_lost_counter);
+		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->query);
 		return -EINVAL;
-- 
cgit 


From 26eedf6daec4e7937c8f0f1dde5e9b8e3dcebfd3 Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
Date: Wed, 11 Oct 2017 17:02:02 -0400
Subject: drm/amdgpu: Fix extra call to amdgpu_ctx_put.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In amdgpu_cs_parser_init() in case of error handling
amdgpu_ctx_put() is called without setting p->ctx to NULL after that,
later amdgpu_cs_parser_fini() also calls amdgpu_ctx_put() again and
mess up the reference count.

Signed-off-by: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2ae5d523ca10..dfd37785563f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -97,7 +97,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (copy_from_user(chunk_array, chunk_array_user,
 			   sizeof(uint64_t)*cs->in.num_chunks)) {
 		ret = -EFAULT;
-		goto put_ctx;
+		goto free_chunk;
 	}
 
 	p->nchunks = cs->in.num_chunks;
@@ -105,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 			    GFP_KERNEL);
 	if (!p->chunks) {
 		ret = -ENOMEM;
-		goto put_ctx;
+		goto free_chunk;
 	}
 
 	for (i = 0; i < p->nchunks; i++) {
@@ -191,8 +191,6 @@ free_partial_kdata:
 	kfree(p->chunks);
 	p->chunks = NULL;
 	p->nchunks = 0;
-put_ctx:
-	amdgpu_ctx_put(p->ctx);
 free_chunk:
 	kfree(chunk_array);
 
-- 
cgit 


From 1eca5a530dc3ac0a2fadd21da1c9e6c729a4a2a1 Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Tue, 3 Oct 2017 15:41:56 -0400
Subject: drm/amdgpu: Refactor amdgpu_move_blit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports
arbitrary copy size, offsets and two BOs (source & dest.).

This is useful for KFD Cross Memory Attach feature where data needs to
be copied from BOs from different processes

v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem()
function parameters to use the struct

v3: Minor function name change

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  12 +++
 2 files changed, 132 insertions(+), 49 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a2282bacf960..382c0ae0561e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -290,97 +290,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 	return addr;
 }
 
-static int amdgpu_move_blit(struct ttm_buffer_object *bo,
-			    bool evict, bool no_wait_gpu,
-			    struct ttm_mem_reg *new_mem,
-			    struct ttm_mem_reg *old_mem)
+/**
+ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
+ *
+ * The function copies @size bytes from {src->mem + src->offset} to
+ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
+ * move and different for a BO to BO copy.
+ *
+ * @f: Returns the last fence if multiple jobs are submitted.
+ */
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+			       struct amdgpu_copy_mem *src,
+			       struct amdgpu_copy_mem *dst,
+			       uint64_t size,
+			       struct reservation_object *resv,
+			       struct dma_fence **f)
 {
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-
-	struct drm_mm_node *old_mm, *new_mm;
-	uint64_t old_start, old_size, new_start, new_size;
-	unsigned long num_pages;
+	struct drm_mm_node *src_mm, *dst_mm;
+	uint64_t src_node_start, dst_node_start, src_node_size,
+		 dst_node_size, src_page_offset, dst_page_offset;
 	struct dma_fence *fence = NULL;
-	int r;
-
-	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
+	int r = 0;
+	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
+					AMDGPU_GPU_PAGE_SIZE);
 
 	if (!ring->ready) {
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
 		return -EINVAL;
 	}
 
-	old_mm = old_mem->mm_node;
-	old_size = old_mm->size;
-	old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
+	src_mm = src->mem->mm_node;
+	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
+		src->offset -= (src_mm->size << PAGE_SHIFT);
+		++src_mm;
+	}
+	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
+					     src->offset;
+	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
+	src_page_offset = src_node_start & (PAGE_SIZE - 1);
 
-	new_mm = new_mem->mm_node;
-	new_size = new_mm->size;
-	new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
+	dst_mm = dst->mem->mm_node;
+	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
+		dst->offset -= (dst_mm->size << PAGE_SHIFT);
+		++dst_mm;
+	}
+	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
+					     dst->offset;
+	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
+	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
 
-	num_pages = new_mem->num_pages;
 	mutex_lock(&adev->mman.gtt_window_lock);
-	while (num_pages) {
-		unsigned long cur_pages = min(min(old_size, new_size),
-					      (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
-		uint64_t from = old_start, to = new_start;
+
+	while (size) {
+		unsigned long cur_size;
+		uint64_t from = src_node_start, to = dst_node_start;
 		struct dma_fence *next;
 
-		if (old_mem->mem_type == TTM_PL_TT &&
-		    !amdgpu_gtt_mgr_is_allocated(old_mem)) {
-			r = amdgpu_map_buffer(bo, old_mem, cur_pages,
-					      old_start, 0, ring, &from);
+		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
+		 * begins at an offset, then adjust the size accordingly
+		 */
+		cur_size = min3(min(src_node_size, dst_node_size), size,
+				GTT_MAX_BYTES);
+		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
+		    cur_size + dst_page_offset > GTT_MAX_BYTES)
+			cur_size -= max(src_page_offset, dst_page_offset);
+
+		/* Map only what needs to be accessed. Map src to window 0 and
+		 * dst to window 1
+		 */
+		if (src->mem->mem_type == TTM_PL_TT &&
+		    !amdgpu_gtt_mgr_is_allocated(src->mem)) {
+			r = amdgpu_map_buffer(src->bo, src->mem,
+					PFN_UP(cur_size + src_page_offset),
+					src_node_start, 0, ring,
+					&from);
 			if (r)
 				goto error;
+			/* Adjust the offset because amdgpu_map_buffer returns
+			 * start of mapped page
+			 */
+			from += src_page_offset;
 		}
 
-		if (new_mem->mem_type == TTM_PL_TT &&
-		    !amdgpu_gtt_mgr_is_allocated(new_mem)) {
-			r = amdgpu_map_buffer(bo, new_mem, cur_pages,
-					      new_start, 1, ring, &to);
+		if (dst->mem->mem_type == TTM_PL_TT &&
+		    !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
+			r = amdgpu_map_buffer(dst->bo, dst->mem,
+					PFN_UP(cur_size + dst_page_offset),
+					dst_node_start, 1, ring,
+					&to);
 			if (r)
 				goto error;
+			to += dst_page_offset;
 		}
 
-		r = amdgpu_copy_buffer(ring, from, to,
-				       cur_pages * PAGE_SIZE,
-				       bo->resv, &next, false, true);
+		r = amdgpu_copy_buffer(ring, from, to, cur_size,
+				       resv, &next, false, true);
 		if (r)
 			goto error;
 
 		dma_fence_put(fence);
 		fence = next;
 
-		num_pages -= cur_pages;
-		if (!num_pages)
+		size -= cur_size;
+		if (!size)
 			break;
 
-		old_size -= cur_pages;
-		if (!old_size) {
-			old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
-			old_size = old_mm->size;
+		src_node_size -= cur_size;
+		if (!src_node_size) {
+			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
+							     src->mem);
+			src_node_size = (src_mm->size << PAGE_SHIFT);
 		} else {
-			old_start += cur_pages * PAGE_SIZE;
+			src_node_start += cur_size;
+			src_page_offset = src_node_start & (PAGE_SIZE - 1);
 		}
-
-		new_size -= cur_pages;
-		if (!new_size) {
-			new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
-			new_size = new_mm->size;
+		dst_node_size -= cur_size;
+		if (!dst_node_size) {
+			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
+							     dst->mem);
+			dst_node_size = (dst_mm->size << PAGE_SHIFT);
 		} else {
-			new_start += cur_pages * PAGE_SIZE;
+			dst_node_start += cur_size;
+			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
 		}
 	}
+error:
 	mutex_unlock(&adev->mman.gtt_window_lock);
+	if (f)
+		*f = dma_fence_get(fence);
+	dma_fence_put(fence);
+	return r;
+}
+
+
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+			    bool evict, bool no_wait_gpu,
+			    struct ttm_mem_reg *new_mem,
+			    struct ttm_mem_reg *old_mem)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct amdgpu_copy_mem src, dst;
+	struct dma_fence *fence = NULL;
+	int r;
+
+	src.bo = bo;
+	dst.bo = bo;
+	src.mem = old_mem;
+	dst.mem = new_mem;
+	src.offset = 0;
+	dst.offset = 0;
+
+	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
+				       new_mem->num_pages << PAGE_SHIFT,
+				       bo->resv, &fence);
+	if (r)
+		goto error;
 
 	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
 	dma_fence_put(fence);
 	return r;
 
 error:
-	mutex_unlock(&adev->mman.gtt_window_lock);
-
 	if (fence)
 		dma_fence_wait(fence, false);
 	dma_fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 7abae6867339..abd4084982a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -58,6 +58,12 @@ struct amdgpu_mman {
 	struct amd_sched_entity			entity;
 };
 
+struct amdgpu_copy_mem {
+	struct ttm_buffer_object	*bo;
+	struct ttm_mem_reg		*mem;
+	unsigned long			offset;
+};
+
 extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
 extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
 
@@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		       struct reservation_object *resv,
 		       struct dma_fence **fence, bool direct_submit,
 		       bool vm_needs_flush);
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+			       struct amdgpu_copy_mem *src,
+			       struct amdgpu_copy_mem *dst,
+			       uint64_t size,
+			       struct reservation_object *resv,
+			       struct dma_fence **f);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint64_t src_data,
 			struct reservation_object *resv,
-- 
cgit 


From e1d515052f9075eb1b791b21467d79db3529db83 Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Fri, 6 Oct 2017 17:36:35 -0400
Subject: drm/amdgpu: Add amdgpu_find_mm_node()

Replace some commonly repeated code with a function.

v2: Use amdgpu_find_mm_node() in amdgpu_ttm_io_mem_pfn()

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 49 ++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 22 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 382c0ae0561e..51eacefadea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -291,7 +291,24 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 }
 
 /**
- * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node
+ *  corresponding to @offset. It also modifies the offset to be
+ *  within the drm_mm_node returned
+ */
+static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
+					       unsigned long *offset)
+{
+	struct drm_mm_node *mm_node = mem->mm_node;
+
+	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
+		*offset -= (mm_node->size << PAGE_SHIFT);
+		++mm_node;
+	}
+	return mm_node;
+}
+
+/**
+ * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
  *
  * The function copies @size bytes from {src->mem + src->offset} to
  * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
@@ -320,21 +337,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
-	src_mm = src->mem->mm_node;
-	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
-		src->offset -= (src_mm->size << PAGE_SHIFT);
-		++src_mm;
-	}
+	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
 	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
 					     src->offset;
 	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
 	src_page_offset = src_node_start & (PAGE_SIZE - 1);
 
-	dst_mm = dst->mem->mm_node;
-	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
-		dst->offset -= (dst_mm->size << PAGE_SHIFT);
-		++dst_mm;
-	}
+	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
 	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
 					     dst->offset;
 	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
@@ -654,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 					   unsigned long page_offset)
 {
-	struct drm_mm_node *mm = bo->mem.mm_node;
-	uint64_t size = mm->size;
-	uint64_t offset = page_offset;
+	struct drm_mm_node *mm;
+	unsigned long offset = (page_offset << PAGE_SHIFT);
 
-	page_offset = do_div(offset, size);
-	mm += offset;
-	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
+	mm = amdgpu_find_mm_node(&bo->mem, &offset);
+	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
+		(offset >> PAGE_SHIFT);
 }
 
 /*
@@ -1216,7 +1224,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 {
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
-	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+	struct drm_mm_node *nodes;
 	uint32_t value = 0;
 	int ret = 0;
 	uint64_t pos;
@@ -1225,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	if (bo->mem.mem_type != TTM_PL_VRAM)
 		return -EIO;
 
-	while (offset >= (nodes->size << PAGE_SHIFT)) {
-		offset -= nodes->size << PAGE_SHIFT;
-		++nodes;
-	}
+	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
 	pos = (nodes->start << PAGE_SHIFT) + offset;
 
 	while (len && pos < adev->mc.mc_vram_size) {
-- 
cgit 


From c5795c555bbaca51192ffc6164bb85845ecdf717 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 12 Oct 2017 12:16:33 +0200
Subject: drm/amdgpu: minor CS optimization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We only need to loop over all IBs for old UVD/VCE command stream patching.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 37 +++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 18 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dfd37785563f..52dd78ee8fd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -853,36 +853,37 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_ring *ring = p->job->ring;
-	int i, j, r;
-
-	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
-
-		struct amdgpu_cs_chunk *chunk;
-		struct amdgpu_ib *ib;
-		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
-
-		chunk = &p->chunks[i];
-		ib = &p->job->ibs[j];
-		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
+	int r;
 
-		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
-			continue;
+	/* Only for UVD/VCE VM emulation */
+	if (p->job->ring->funcs->parse_cs) {
+		unsigned i, j;
 
-		if (p->job->ring->funcs->parse_cs) {
+		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
+			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 			struct amdgpu_bo_va_mapping *m;
 			struct amdgpu_bo *aobj = NULL;
+			struct amdgpu_cs_chunk *chunk;
+			struct amdgpu_ib *ib;
 			uint64_t offset;
 			uint8_t *kptr;
 
+			chunk = &p->chunks[i];
+			ib = &p->job->ibs[j];
+			chunk_ib = chunk->kdata;
+
+			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+				continue;
+
 			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
-					&aobj, &m);
+						   &aobj, &m);
 			if (r) {
 				DRM_ERROR("IB va_start is invalid\n");
 				return r;
 			}
 
 			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
-				(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
 				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
 				return -EINVAL;
 			}
@@ -899,12 +900,12 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
 			amdgpu_bo_kunmap(aobj);
 
-			/* Only for UVD/VCE VM emulation */
 			r = amdgpu_ring_parse_cs(ring, p, j);
 			if (r)
 				return r;
+
+			j++;
 		}
-		j++;
 	}
 
 	if (p->job->vm) {
-- 
cgit 


From c057c11403e461185a24d7f5dc2fbd059bbd1502 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 12 Oct 2017 16:26:34 -0400
Subject: drm/amdgpu: bump version for vram lost counter query (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: vram -> VRAM in comment

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b9a32585f58d..dd2f060d62a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -72,9 +72,10 @@
  * - 3.20.0 - Add support for local BOs
  * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
  * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
+ * - 3.23.0 - Add query for VRAM lost counter
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	22
+#define KMS_DRIVER_MINOR	23
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
-- 
cgit 


From 2642cf110d08a403f585a051e4cbf45a90b3adea Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 13 Oct 2017 17:24:31 +0200
Subject: drm/amdgpu: reserve root PD while releasing it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise somebody could try to evict it at the same time and try to use
half torn down structures.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-and-Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c559d76ff695..010d14195a5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2759,8 +2759,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+	struct amdgpu_bo *root;
 	u64 fault;
-	int i;
+	int i, r;
 
 	/* Clear pending page faults from IH when the VM is destroyed */
 	while (kfifo_get(&vm->faults, &fault))
@@ -2795,7 +2796,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
 	}
 
-	amdgpu_vm_free_levels(&vm->root);
+	root = amdgpu_bo_ref(vm->root.base.bo);
+	r = amdgpu_bo_reserve(root, true);
+	if (r) {
+		dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
+	} else {
+		amdgpu_vm_free_levels(&vm->root);
+		amdgpu_bo_unreserve(root);
+	}
+	amdgpu_bo_unref(&root);
 	dma_fence_put(vm->last_update);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		amdgpu_vm_free_reserved_vmid(adev, vm, i);
-- 
cgit 


From c1b24a1405ed8720f5837fc77fbc52fd008cbb42 Mon Sep 17 00:00:00 2001
From: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
Date: Sun, 15 Oct 2017 22:29:23 -0400
Subject: drm/amdgpu: use ARRAY_SIZE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using the ARRAY_SIZE macro improves the readability of the code.

Found with Coccinelle with the following semantic patch:
@r depends on (org || report)@
type T;
T[] E;
position p;
@@
(
 (sizeof(E)@p /sizeof(*E))
|
 (sizeof(E)@p /sizeof(E[...]))
|
 (sizeof(E)@p /sizeof(T))
)

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 9 +++++----
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 +++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 51896b7353b6..b8002ac3e536 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -20,6 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+#include <linux/kernel.h>
 #include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
@@ -3952,10 +3953,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
 				unique_indices,
 				&indices_count,
-				sizeof(unique_indices) / sizeof(int),
+				ARRAY_SIZE(unique_indices),
 				indirect_start_offsets,
 				&offset_count,
-				sizeof(indirect_start_offsets)/sizeof(int));
+				ARRAY_SIZE(indirect_start_offsets));
 
 	/* save and restore list */
 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
@@ -3977,14 +3978,14 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
 	/* starting offsets starts */
 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
 		adev->gfx.rlc.starting_offsets_start);
-	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
+	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
 		WREG32(mmRLC_GPM_SCRATCH_DATA,
 				indirect_start_offsets[i]);
 
 	/* unique indices */
 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
-	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
+	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
 		if (unique_indices[i] != 0) {
 			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
 			WREG32(data + i, unique_indices[i] >> 20);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 99a5b3b92e8e..7f15bb2c5233 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -20,6 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+#include <linux/kernel.h>
 #include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
@@ -1730,10 +1731,10 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
 				unique_indirect_regs,
 				&unique_indirect_reg_count,
-				sizeof(unique_indirect_regs)/sizeof(int),
+				ARRAY_SIZE(unique_indirect_regs),
 				indirect_start_offsets,
 				&indirect_start_offsets_count,
-				sizeof(indirect_start_offsets)/sizeof(int));
+				ARRAY_SIZE(indirect_start_offsets));
 
 	/* enable auto inc in case it is disabled */
 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
@@ -1770,12 +1771,12 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
 	/* write the starting offsets to RLC scratch ram */
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
 		adev->gfx.rlc.starting_offsets_start);
-	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
+	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
 			indirect_start_offsets[i]);
 
 	/* load unique indirect regs*/
-	for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) {
+	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
 			unique_indirect_regs[i] & 0x3FFFF);
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
-- 
cgit 


From 6e13bdf6b2d71ab2366a9f87c99d11963aed3bad Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 18 Oct 2017 17:19:42 +0800
Subject: drm/amdgpu: fix regresstion on SR-IOV gpu reset failed

fw ucode is corrupted after vf flr by PSP so ucode_init() is
a must in psp_hw_init othewise KIQ/KCQ enabling will fail

Revert "drm/amdgpu: refine code delete duplicated error handling"
This reverts commit e57b87ff828f95efe992468e6d18c2c059b27aa9.
Revert "drm/amdgpu: move amdgpu_ucode_init_bo to amdgpu_device.c"
This reverts commit 815b8f8595148d06a64d2ce4282e8e80dfcb02f1.

Reviewed-by: Monk Liu <monk.liu@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 8 --------
 drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 5 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c       | 9 +++++++++
 3 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9cdaba4af216..0731b4f9b25c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1679,7 +1679,6 @@ static int amdgpu_init(struct amdgpu_device *adev)
 			return r;
 		}
 		adev->ip_blocks[i].status.sw = true;
-
 		/* need to do gmc hw init early so we can allocate gpu mem */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 			r = amdgpu_vram_scratch_init(adev);
@@ -1710,11 +1709,6 @@ static int amdgpu_init(struct amdgpu_device *adev)
 		}
 	}
 
-	mutex_lock(&adev->firmware.mutex);
-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
-		amdgpu_ucode_init_bo(adev);
-	mutex_unlock(&adev->firmware.mutex);
-
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.sw)
 			continue;
@@ -1850,8 +1844,6 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 
 		adev->ip_blocks[i].status.hw = false;
 	}
-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
-		amdgpu_ucode_fini_bo(adev);
 
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.sw)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 3b42f407971d..5f5aa5fddc16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -145,6 +145,8 @@ static int amdgpu_pp_hw_init(void *handle)
 	int ret = 0;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
+		amdgpu_ucode_init_bo(adev);
 
 	if (adev->powerplay.ip_funcs->hw_init)
 		ret = adev->powerplay.ip_funcs->hw_init(
@@ -162,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle)
 		ret = adev->powerplay.ip_funcs->hw_fini(
 					adev->powerplay.pp_handle);
 
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
+		amdgpu_ucode_fini_bo(adev);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index f1035a689d35..447d446b5015 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -411,6 +411,13 @@ static int psp_hw_init(void *handle)
 		return 0;
 
 	mutex_lock(&adev->firmware.mutex);
+	/*
+	 * This sequence is just used on hw_init only once, no need on
+	 * resume.
+	 */
+	ret = amdgpu_ucode_init_bo(adev);
+	if (ret)
+		goto failed;
 
 	ret = psp_load_fw(adev);
 	if (ret) {
@@ -435,6 +442,8 @@ static int psp_hw_fini(void *handle)
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		return 0;
 
+	amdgpu_ucode_fini_bo(adev);
+
 	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
 	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
-- 
cgit 


From 896a664c969307a4954cd8c7754fd5d4e5358396 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Tue, 17 Oct 2017 19:23:42 +0800
Subject: drm/amdgpu:reduce wb to 512 slot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

with current WB usage we only use 57 slots, so 512
is extreamly sufficient, and reduce to 512 can
make WB fit into one page.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 67b864436be1..b8ba1f5ae5e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1151,7 +1151,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 1024	/* Reserve at most 1024 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 512	/* Reserve at most 512 WB slots for amdgpu-owned rings. */
 
 struct amdgpu_wb {
 	struct amdgpu_bo	*wb_obj;
-- 
cgit 


From c70b78a71e9a283240f72dfdfff8fd2388db51da Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Mon, 16 Oct 2017 20:02:08 +0800
Subject: drm/amdgpu:fix duplicated setting job's vram_lost
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 52dd78ee8fd0..32cf83e2f2d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -172,7 +172,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (ret)
 		goto free_all_kdata;
 
-	p->job->vram_lost_counter = atomic_read(&p->adev->vram_lost_counter);
 	if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
 		ret = -ECANCELED;
 		goto free_all_kdata;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index a8357885776e..0cfc68db575b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -61,11 +61,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
-	(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 
 	amdgpu_sync_create(&(*job)->sync);
 	amdgpu_sync_create(&(*job)->dep_sync);
 	amdgpu_sync_create(&(*job)->sched_sync);
+	(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 
 	return 0;
 }
-- 
cgit 


From f993d628a2d3cb5e0a82a5284b24cef745f42b41 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Mon, 16 Oct 2017 19:46:01 +0800
Subject: drm/amdgpu:don't check soft_reset for sriov
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Ack-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0731b4f9b25c..3a6ce6386ad0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2625,6 +2625,9 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
 	int i;
 	bool asic_hang = false;
 
+	if (amdgpu_sriov_vf(adev))
+		return true;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
-- 
cgit 


From e71de0766191d32648cf12dfb2f53f05e52b2dcc Mon Sep 17 00:00:00 2001
From: pding <Pixel.Ding@amd.com>
Date: Thu, 12 Oct 2017 13:53:20 +0800
Subject: drm/amdgpu: report more amdgpu_fence_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only for GFX ring. This can help checking MCBP feature.

The fence at the end of the frame will indicate the completion status.
If the frame completed normally, the fence is written to the address
given in the EVENT_WRITE_EOP packet. If preemption occurred in the
previous IB the address is adjusted by 2 DWs. If work submitted in the
frame was reset before completion, the fence address is adjusted by
four DWs. In the case that preemption occurred, and before preemption
completed a reset was initiated, the address will be adjusted with six
DWs

Signed-off-by: pding <Pixel.Ding@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 333bad749067..7bdedd788f5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -641,6 +641,19 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 			   atomic_read(&ring->fence_drv.last_seq));
 		seq_printf(m, "Last emitted        0x%08x\n",
 			   ring->fence_drv.sync_seq);
+
+		if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
+			continue;
+
+		/* set in CP_VMID_PREEMPT and preemption occurred */
+		seq_printf(m, "Last preempted      0x%08x\n",
+			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 2)));
+		/* set in CP_VMID_RESET and reset occurred */
+		seq_printf(m, "Last reset          0x%08x\n",
+			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 4)));
+		/* Both preemption and reset occurred */
+		seq_printf(m, "Last both           0x%08x\n",
+			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 6)));
 	}
 	return 0;
 }
-- 
cgit 


From 43ca8efa46d9b1c4defa1b27c4dd1ef3866aaad9 Mon Sep 17 00:00:00 2001
From: pding <Pixel.Ding@amd.com>
Date: Fri, 13 Oct 2017 15:38:35 +0800
Subject: drm/amdgpu: busywait KIQ register accessing (v4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Register accessing is performed when IRQ is disabled. Never sleep in
this function.

Known issue: dead sleep in many use cases of index/data registers.

v2:
 - wrap polling fence functions.
 - don't trigger IRQ for polling in case of wrongly fence signal.

v3:
 - handle wrap round gracefully.
 - add comments for polling function

v4:
 - don't return negative timeout confused with error code

Signed-off-by: pding <Pixel.Ding@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  8 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 50 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 30 ++++++++----------
 6 files changed, 71 insertions(+), 25 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b8ba1f5ae5e7..cbcb6a153aba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -879,7 +879,7 @@ struct amdgpu_mec {
 struct amdgpu_kiq {
 	u64			eop_gpu_addr;
 	struct amdgpu_bo	*eop_obj;
-	struct mutex		ring_mutex;
+	spinlock_t              ring_lock;
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3a6ce6386ad0..0b9332e65a4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 {
 	uint32_t ret;
 
-	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
-		BUG_ON(in_interrupt());
+	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 		return amdgpu_virt_kiq_rreg(adev, reg);
-	}
 
 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		adev->last_mm_index = v;
 	}
 
-	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
-		BUG_ON(in_interrupt());
+	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 		return amdgpu_virt_kiq_wreg(adev, reg, v);
-	}
 
 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7bdedd788f5a..fb9f88ef6059 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -168,6 +168,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
 	return 0;
 }
 
+/**
+ * amdgpu_fence_emit_polling - emit a fence on the requeste ring
+ *
+ * @ring: ring the fence is associated with
+ * @s: resulting sequence number
+ *
+ * Emits a fence command on the requested ring (all asics).
+ * Used For polling fence.
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
+{
+	uint32_t seq;
+
+	if (!s)
+		return -EINVAL;
+
+	seq = ++ring->fence_drv.sync_seq;
+	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+			       seq, AMDGPU_FENCE_FLAG_INT);
+
+	*s = seq;
+
+	return 0;
+}
+
 /**
  * amdgpu_fence_schedule_fallback - schedule fallback check
  *
@@ -281,6 +307,30 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 	return r;
 }
 
+/**
+ * amdgpu_fence_wait_polling - busy wait for givn sequence number
+ *
+ * @ring: ring index the fence is associated with
+ * @wait_seq: sequence number to wait
+ * @timeout: the timeout for waiting in usecs
+ *
+ * Wait for all fences on the requested ring to signal (all asics).
+ * Returns left time if no timeout, 0 or minus if timeout.
+ */
+signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
+				      uint32_t wait_seq,
+				      signed long timeout)
+{
+	uint32_t seq;
+
+	do {
+		seq = amdgpu_fence_read(ring);
+		udelay(5);
+		timeout -= 5;
+	} while ((int32_t)(wait_seq - seq) > 0 && timeout > 0);
+
+	return timeout > 0 ? timeout : 0;
+}
 /**
  * amdgpu_fence_count_emitted - get the count of emitted fences
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 83435ccbad44..ef043361009f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -201,7 +201,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	int r = 0;
 
-	mutex_init(&kiq->ring_mutex);
+	spin_lock_init(&kiq->ring_lock);
 
 	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 0d9ce141404c..b18c2b96691f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -90,8 +90,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
 void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
 int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
 void amdgpu_fence_process(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
+signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
+				      uint32_t wait_seq,
+				      signed long timeout);
 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index ed7be2eb24b0..e97f80f86005 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,7 +22,7 @@
  */
 
 #include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT	100000
+#define MAX_KIQ_REG_WAIT	100000000 /* in usecs */
 
 int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
 {
@@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 {
 	signed long r;
-	uint32_t val;
-	struct dma_fence *f;
+	uint32_t val, seq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_ring *ring = &kiq->ring;
 
 	BUG_ON(!ring->funcs->emit_rreg);
 
-	mutex_lock(&kiq->ring_mutex);
+	spin_lock(&kiq->ring_lock);
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_emit_rreg(ring, reg);
-	amdgpu_fence_emit(ring, &f);
+	amdgpu_fence_emit_polling(ring, &seq);
 	amdgpu_ring_commit(ring);
-	mutex_unlock(&kiq->ring_mutex);
+	spin_unlock(&kiq->ring_lock);
 
-	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
-	dma_fence_put(f);
+	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 	if (r < 1) {
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		DRM_ERROR("wait for kiq fence error: %ld\n", r);
 		return ~0;
 	}
-
 	val = adev->wb.wb[adev->virt.reg_val_offs];
 
 	return val;
@@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 {
 	signed long r;
-	struct dma_fence *f;
+	uint32_t seq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_ring *ring = &kiq->ring;
 
 	BUG_ON(!ring->funcs->emit_wreg);
 
-	mutex_lock(&kiq->ring_mutex);
+	spin_lock(&kiq->ring_lock);
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_emit_wreg(ring, reg, v);
-	amdgpu_fence_emit(ring, &f);
+	amdgpu_fence_emit_polling(ring, &seq);
 	amdgpu_ring_commit(ring);
-	mutex_unlock(&kiq->ring_mutex);
+	spin_unlock(&kiq->ring_lock);
 
-	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
+	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 	if (r < 1)
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-	dma_fence_put(f);
+		DRM_ERROR("wait for kiq fence error: %ld\n", r);
 }
 
 /**
-- 
cgit 


From 4b6b691ee38abae8842aed61d442dfb315c45789 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 16 Oct 2017 10:32:04 +0200
Subject: drm/amdgpu: linear validate first then bind to GART
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For VM emulation for old UVD/VCE we need to validate the BO with linear
VRAM flag set first and then eventually bind it to GART.

Validating with linear VRAM flag set can move the BO to GART making
UVD/VCE read/write from an unbound GART BO.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
CC: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 32cf83e2f2d9..f7fceb63413c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1582,14 +1582,14 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 	if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
 		return -EINVAL;
 
-	r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
-	if (unlikely(r))
-		return r;
-
-	if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
-		return 0;
+	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
+		(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+		amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
+		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false,
+				    false);
+		if (r)
+			return r;
+	}
 
-	(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-	amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
-	return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false);
+	return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
 }
-- 
cgit 


From d2d7cc330dc1f8ea7cf29dfb7d3ad8bccdcde597 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 16 Oct 2017 11:18:54 +0200
Subject: drm/amdgpu: allow GTT overcommit during bind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While binding BOs to GART we need to allow a bit overcommit in the GTT
domain. Otherwise we can never use the full GART space when GART size=GTT size.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 0d15eb7d31d7..33535d347734 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -169,7 +169,8 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
 	int r;
 
 	spin_lock(&mgr->lock);
-	if (atomic64_read(&mgr->available) < mem->num_pages) {
+	if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) &&
+	    atomic64_read(&mgr->available) < mem->num_pages) {
 		spin_unlock(&mgr->lock);
 		return 0;
 	}
@@ -244,8 +245,9 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man,
 uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man)
 {
 	struct amdgpu_gtt_mgr *mgr = man->priv;
+	s64 result = man->size - atomic64_read(&mgr->available);
 
-	return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE;
+	return (result > 0 ? result : 0) * PAGE_SIZE;
 }
 
 /**
@@ -265,7 +267,7 @@ static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man,
 	drm_mm_print(&mgr->mm, printer);
 	spin_unlock(&mgr->lock);
 
-	drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
+	drm_printf(printer, "man size:%llu pages, gtt available:%lld pages, usage:%lluMB\n",
 		   man->size, (u64)atomic64_read(&mgr->available),
 		   amdgpu_gtt_mgr_usage(man) >> 20);
 }
-- 
cgit 


From 91fe77eb9585b45928b93cfe402248c77c28dab2 Mon Sep 17 00:00:00 2001
From: pding <Pixel.Ding@amd.com>
Date: Thu, 19 Oct 2017 09:38:39 +0800
Subject: drm/amdgpu: merge bios post checking functions

Merge the post checking functions to avoid confusion and take
virtualization into account in all cases.

Signed-off-by: pding <Pixel.Ding@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 41 +++++++++++++-----------------
 1 file changed, 18 insertions(+), 23 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0b9332e65a4c..8cf58911b17b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -744,27 +744,6 @@ bool amdgpu_need_post(struct amdgpu_device *adev)
 {
 	uint32_t reg;
 
-	if (adev->has_hw_reset) {
-		adev->has_hw_reset = false;
-		return true;
-	}
-
-	/* bios scratch used on CIK+ */
-	if (adev->asic_type >= CHIP_BONAIRE)
-		return amdgpu_atombios_scratch_need_asic_init(adev);
-
-	/* check MEM_SIZE for older asics */
-	reg = amdgpu_asic_get_config_memsize(adev);
-
-	if ((reg != 0) && (reg != 0xffffffff))
-		return false;
-
-	return true;
-
-}
-
-static bool amdgpu_vpost_needed(struct amdgpu_device *adev)
-{
 	if (amdgpu_sriov_vf(adev))
 		return false;
 
@@ -787,7 +766,23 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev)
 				return true;
 		}
 	}
-	return amdgpu_need_post(adev);
+
+	if (adev->has_hw_reset) {
+		adev->has_hw_reset = false;
+		return true;
+	}
+
+	/* bios scratch used on CIK+ */
+	if (adev->asic_type >= CHIP_BONAIRE)
+		return amdgpu_atombios_scratch_need_asic_init(adev);
+
+	/* check MEM_SIZE for older asics */
+	reg = amdgpu_asic_get_config_memsize(adev);
+
+	if ((reg != 0) && (reg != 0xffffffff))
+		return false;
+
+	return true;
 }
 
 /**
@@ -2208,7 +2203,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	amdgpu_device_detect_sriov_bios(adev);
 
 	/* Post card if necessary */
-	if (amdgpu_vpost_needed(adev)) {
+	if (amdgpu_need_post(adev)) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "no vBIOS found\n");
 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
-- 
cgit 


From ef4c166dc2335bf20c7a854a0de52d17ed9f2f38 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Fri, 22 Sep 2017 16:23:34 +0800
Subject: drm/amdgpu/sriov:now must reinit psp

otherwise after VF FLR the KIQ cannot work

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8cf58911b17b..07726afd9307 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1946,6 +1946,7 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
 
 	static enum amd_ip_block_type ip_order[] = {
 		AMD_IP_BLOCK_TYPE_SMC,
+		AMD_IP_BLOCK_TYPE_PSP,
 		AMD_IP_BLOCK_TYPE_DCE,
 		AMD_IP_BLOCK_TYPE_GFX,
 		AMD_IP_BLOCK_TYPE_SDMA,
-- 
cgit 


From 6867e1b5fbd1a9deaf95a1bd23ea930063c8d216 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Mon, 16 Oct 2017 19:50:44 +0800
Subject: drm/amdgpu:fix vf_error_put

1,it should not work on non-SR-IOV case
2,the NO_VBIOS error is incorrect, should
handle it under detect_sriov_bios.
3,wrap the whole detect_sriov_bios with sriov check

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 21 +++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c |  7 ++++++-
 2 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 07726afd9307..e29731c4ada7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2032,12 +2032,17 @@ static int amdgpu_resume(struct amdgpu_device *adev)
 
 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
 {
-	if (adev->is_atom_fw) {
-		if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
-			adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
-	} else {
-		if (amdgpu_atombios_has_gpu_virtualization_table(adev))
-			adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+	if (amdgpu_sriov_vf(adev)) {
+		if (adev->is_atom_fw) {
+			if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
+				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+		} else {
+			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
+				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+		}
+
+		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
 	}
 }
 
@@ -2207,7 +2212,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (amdgpu_need_post(adev)) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "no vBIOS found\n");
-			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
 			r = -EINVAL;
 			goto failed;
 		}
@@ -2215,7 +2219,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
 		if (r) {
 			dev_err(adev->dev, "gpu post error!\n");
-			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
 			goto failed;
 		}
 	} else {
@@ -3019,7 +3022,6 @@ out:
 		}
 	} else {
 		dev_err(adev->dev, "asic resume failed (%d).\n", r);
-		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			if (adev->rings[i] && adev->rings[i]->sched.thread) {
 				kthread_unpark(adev->rings[i]->sched.thread);
@@ -3033,7 +3035,6 @@ out:
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
 		dev_info(adev->dev, "GPU reset failed\n");
-		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
 	}
 	else {
 		dev_info(adev->dev, "GPU reset successed!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
index 746b81339835..7f7097931c6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
@@ -31,7 +31,12 @@ void amdgpu_vf_error_put(struct amdgpu_device *adev,
 			 uint64_t error_data)
 {
 	int index;
-	uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
+	uint16_t error_code;
+
+	if (!amdgpu_sriov_vf(adev))
+		return;
+
+	error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
 
 	mutex_lock(&adev->virt.vf_errors.lock);
 	index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
-- 
cgit 


From 63ae07ca4fb4adcacc19272b05b5c3d398e94a56 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Tue, 17 Oct 2017 19:18:56 +0800
Subject: drm/amdgpu:fix wb_clear

Properly shift the index when clearing so we clear
the right bit

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e29731c4ada7..efcacb827de7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -546,7 +546,7 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb)
 
 	if (offset < adev->wb.num_wb) {
 		__set_bit(offset, adev->wb.used);
-		*wb = offset * 8; /* convert to dw offset */
+		*wb = offset << 3; /* convert to dw offset */
 		return 0;
 	} else {
 		return -EINVAL;
@@ -564,7 +564,7 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb)
 void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb)
 {
 	if (wb < adev->wb.num_wb)
-		__clear_bit(wb, adev->wb.used);
+		__clear_bit(wb >> 3, adev->wb.used);
 }
 
 /**
-- 
cgit 


From d3daa2c7865cbfa830651b11c8ad1df23465b46e Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Mon, 23 Oct 2017 11:27:35 -0400
Subject: drm/amd/amdgpu: Remove workaround check for UVD6 on APUs

On APUs the uvd6 driver was skipping proper suspend/resume routines resulting
in a broken state upon resume.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 31db356476f8..430a6b4dfac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -225,11 +225,7 @@ static int uvd_v6_0_suspend(void *handle)
 	if (r)
 		return r;
 
-	/* Skip this for APU for now */
-	if (!(adev->flags & AMD_IS_APU))
-		r = amdgpu_uvd_suspend(adev);
-
-	return r;
+	return amdgpu_uvd_suspend(adev);
 }
 
 static int uvd_v6_0_resume(void *handle)
@@ -237,12 +233,10 @@ static int uvd_v6_0_resume(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	/* Skip this for APU for now */
-	if (!(adev->flags & AMD_IS_APU)) {
-		r = amdgpu_uvd_resume(adev);
-		if (r)
-			return r;
-	}
+	r = amdgpu_uvd_resume(adev);
+	if (r)
+		return r;
+
 	return uvd_v6_0_hw_init(adev);
 }
 
-- 
cgit 


From 4694257e7d4757fedaa8dcd18bb5ff52e76a2765 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Mon, 16 Oct 2017 16:51:28 +0800
Subject: drm/amdgpu/psp: prevent page fault by checking write_frame
 address(v4)

 - Prevent a possible buffer overflow when updating the ring buffer by
    bounds checking the command frame against the available space in the
    ring buffer.

 v2: update the ring_buffer_end address
 v3: update the commit log
 v4: squash in print fix (Michel)

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 14 ++++++++++++--
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c  | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index dea7c909ca5f..4e20d91d5d50 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -257,6 +257,9 @@ int psp_v10_0_cmd_submit(struct psp_context *psp,
 	unsigned int psp_write_ptr_reg = 0;
 	struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
 	struct psp_ring *ring = &psp->km_ring;
+	struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
+	struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
+		ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
 	struct amdgpu_device *adev = psp->adev;
 	uint32_t ring_size_dw = ring->ring_size / 4;
 	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
@@ -266,9 +269,16 @@ int psp_v10_0_cmd_submit(struct psp_context *psp,
 
 	/* Update KM RB frame pointer to new frame */
 	if ((psp_write_ptr_reg % ring_size_dw) == 0)
-		write_frame = ring->ring_mem;
+		write_frame = ring_buffer_start;
 	else
-		write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw);
+		write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
+	/* Check invalid write_frame ptr address */
+	if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
+		DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+			  ring_buffer_start, ring_buffer_end, write_frame);
+		DRM_ERROR("write_frame is pointing to address out of bounds\n");
+		return -EINVAL;
+	}
 
 	/* Initialize KM RB frame */
 	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index cee5c396b277..c7bcfe8e286c 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -367,6 +367,9 @@ int psp_v3_1_cmd_submit(struct psp_context *psp,
 	unsigned int psp_write_ptr_reg = 0;
 	struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
 	struct psp_ring *ring = &psp->km_ring;
+	struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
+	struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
+		ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
 	struct amdgpu_device *adev = psp->adev;
 	uint32_t ring_size_dw = ring->ring_size / 4;
 	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
@@ -378,9 +381,16 @@ int psp_v3_1_cmd_submit(struct psp_context *psp,
 	/* write_frame ptr increments by size of rb_frame in bytes */
 	/* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
 	if ((psp_write_ptr_reg % ring_size_dw) == 0)
-		write_frame = ring->ring_mem;
+		write_frame = ring_buffer_start;
 	else
-		write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw);
+		write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
+	/* Check invalid write_frame ptr address */
+	if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
+		DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+			  ring_buffer_start, ring_buffer_end, write_frame);
+		DRM_ERROR("write_frame is pointing to address out of bounds\n");
+		return -EINVAL;
+	}
 
 	/* Initialize KM RB frame */
 	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-- 
cgit 


From ec8c9f8be875c48c391a1355255867a2d70d1ee2 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 16 Oct 2017 13:47:15 +0200
Subject: drm/amdgpu: minor cleanup for amdgpu_ttm_bind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Filter the placement mask before using it. In theory it could be that we
have other flags set here as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 51eacefadea1..b577b717caa0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -909,7 +909,8 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
 	placement.busy_placement = &placements;
 	placements.fpfn = 0;
 	placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
-	placements.flags = bo->mem.placement | TTM_PL_FLAG_TT;
+	placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
+		TTM_PL_FLAG_TT;
 
 	r = ttm_bo_mem_space(bo, &placement, &tmp, true, false);
 	if (unlikely(r))
-- 
cgit 


From fa2cd036925ffea8ef4f2826734ccfa1b0528141 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 16 Oct 2017 17:37:06 +0200
Subject: drm/amdgpu: don't flush the TLB before initializing GART
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No point in doing this.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index f4370081f6e6..fe818501c520 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -332,12 +332,13 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 		adev->gart.pages[p] = pagelist[i];
 #endif
 
-	if (adev->gart.ptr) {
-		r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
-			    adev->gart.ptr);
-		if (r)
-			return r;
-	}
+	if (!adev->gart.ptr)
+		return 0;
+
+	r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+		    adev->gart.ptr);
+	if (r)
+		return r;
 
 	mb();
 	amdgpu_gart_flush_gpu_tlb(adev, 0);
-- 
cgit 


From 4a0144bfc6df1f7a89a04a3e8662f1f4375eb1fe Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Tue, 24 Oct 2017 12:07:12 -0400
Subject: drm/amd/amdgpu: Remove workaround for suspend/resume in uvd7

The workaround is not required anymor and would result in
hangs during suspend/resume cycles if the uvd block were busy.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Acked-by: Leo Liu <leo.liu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index b8ed8faf2003..6634545060fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -592,11 +592,7 @@ static int uvd_v7_0_suspend(void *handle)
 	if (r)
 		return r;
 
-	/* Skip this for APU for now */
-	if (!(adev->flags & AMD_IS_APU))
-		r = amdgpu_uvd_suspend(adev);
-
-	return r;
+	return amdgpu_uvd_suspend(adev);
 }
 
 static int uvd_v7_0_resume(void *handle)
@@ -604,12 +600,10 @@ static int uvd_v7_0_resume(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	/* Skip this for APU for now */
-	if (!(adev->flags & AMD_IS_APU)) {
-		r = amdgpu_uvd_resume(adev);
-		if (r)
-			return r;
-	}
+	r = amdgpu_uvd_resume(adev);
+	if (r)
+		return r;
+
 	return uvd_v7_0_hw_init(adev);
 }
 
-- 
cgit 


From a695e43712242c354748e9bae5d137d4337a7694 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 31 Oct 2017 09:36:13 +0100
Subject: drm/amdgpu: fix error handling in amdgpu_bo_do_create
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bo structure is freed up in case of an error, so we can't do any
accounting if that happens.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
CC: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8b4ed8a98a18..ea25164e7f4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
 				 &bo->placement, page_align, !kernel, NULL,
 				 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
+	if (unlikely(r != 0))
+		return r;
+
 	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
 		      initial_bytes_moved;
 	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
@@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	else
 		amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
 
-	if (unlikely(r != 0))
-		return r;
-
 	if (kernel)
 		bo->tbo.priority = 1;
 
-- 
cgit 


From 4f626a4ac8f57ddabf06d03870adab91e463217f Mon Sep 17 00:00:00 2001
From: Roman Kapl <rka@sysgo.com>
Date: Mon, 30 Oct 2017 11:56:13 +0100
Subject: drm/radeon: fix atombios on big endian

The function for byteswapping the data send to/from atombios was buggy for
num_bytes not divisible by four. The function must be aware of the fact
that after byte-swapping the u32 units, valid bytes might end up after the
num_bytes boundary.

This patch was tested on kernel 3.12 and allowed us to sucesfully use
DisplayPort on and Radeon SI card. Namely it fixed the link training and
EDID readout.

The function is patched both in radeon and amd drivers, since the functions
and the fixes are identical.

Signed-off-by: Roman Kapl <rka@sysgo.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 38 +++++++++++++---------------
 1 file changed, 18 insertions(+), 20 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index f66d33e4baca..f450b69323fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1766,34 +1766,32 @@ bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev)
 		return true;
 }
 
-/* Atom needs data in little endian format
- * so swap as appropriate when copying data to
- * or from atom. Note that atom operates on
- * dw units.
+/* Atom needs data in little endian format so swap as appropriate when copying
+ * data to or from atom. Note that atom operates on dw units.
+ *
+ * Use to_le=true when sending data to atom and provide at least
+ * ALIGN(num_bytes,4) bytes in the dst buffer.
+ *
+ * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4)
+ * byes in the src buffer.
  */
 void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
 {
 #ifdef __BIG_ENDIAN
-	u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
-	u32 *dst32, *src32;
+	u32 src_tmp[5], dst_tmp[5];
 	int i;
+	u8 align_num_bytes = ALIGN(num_bytes, 4);
 
-	memcpy(src_tmp, src, num_bytes);
-	src32 = (u32 *)src_tmp;
-	dst32 = (u32 *)dst_tmp;
 	if (to_le) {
-		for (i = 0; i < ((num_bytes + 3) / 4); i++)
-			dst32[i] = cpu_to_le32(src32[i]);
-		memcpy(dst, dst_tmp, num_bytes);
+		memcpy(src_tmp, src, num_bytes);
+		for (i = 0; i < align_num_bytes / 4; i++)
+			dst_tmp[i] = cpu_to_le32(src_tmp[i]);
+		memcpy(dst, dst_tmp, align_num_bytes);
 	} else {
-		u8 dws = num_bytes & ~3;
-		for (i = 0; i < ((num_bytes + 3) / 4); i++)
-			dst32[i] = le32_to_cpu(src32[i]);
-		memcpy(dst, dst_tmp, dws);
-		if (num_bytes % 4) {
-			for (i = 0; i < (num_bytes % 4); i++)
-				dst[dws+i] = dst_tmp[dws+i];
-		}
+		memcpy(src_tmp, src, align_num_bytes);
+		for (i = 0; i < align_num_bytes / 4; i++)
+			dst_tmp[i] = le32_to_cpu(src_tmp[i]);
+		memcpy(dst, dst_tmp, num_bytes);
 	}
 #else
 	memcpy(dst, src, num_bytes);
-- 
cgit 


From 24738d7c8750784e5660bd200f0cc2643499417a Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Tue, 31 Oct 2017 19:45:11 +0800
Subject: drm/amdgpu:add fw-vram-usage for atomfirmware

otherwise PF & VF exchange is broken

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index f9ffe8ef0cd6..ff8efd0f8fd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -71,19 +71,33 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
 	struct atom_context *ctx = adev->mode_info.atom_context;
 	int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
 						vram_usagebyfirmware);
+	struct vram_usagebyfirmware_v2_1 *	firmware_usage;
+	uint32_t start_addr, size;
 	uint16_t data_offset;
 	int usage_bytes = 0;
 
 	if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
-		struct vram_usagebyfirmware_v2_1 *firmware_usage =
-			(struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
-
+		firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
 		DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n",
 			  le32_to_cpu(firmware_usage->start_address_in_kb),
 			  le16_to_cpu(firmware_usage->used_by_firmware_in_kb),
 			  le16_to_cpu(firmware_usage->used_by_driver_in_kb));
 
-		usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) * 1024;
+		start_addr = le32_to_cpu(firmware_usage->start_address_in_kb);
+		size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb);
+
+		if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+			(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+			ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+			/* Firmware request VRAM reservation for SR-IOV */
+			adev->fw_vram_usage.start_offset = (start_addr &
+				(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+			adev->fw_vram_usage.size = size << 10;
+			/* Use the default scratch size */
+			usage_bytes = 0;
+		} else {
+			usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10;
+		}
 	}
 	ctx->scratch_size_bytes = 0;
 	if (usage_bytes == 0)
-- 
cgit 


From e477e940dad1836c6f6d23353e424665b9316b6e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 2 Nov 2017 12:25:39 +0100
Subject: drm/amdgpu/virt: don't dereference undefined 'module' struct

Accessing the THIS_MODULE directly is only possible when modules
are enabled, otherwise we get a build failure:

drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c: In function 'amdgpu_virt_init_data_exchange':
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c:331:20: error: dereferencing pointer to incomplete type 'struct module'

Further, THIS_MODULE is NULL when the driver is built-in, so the
code would likely cause a NULL pointer dereference.

This adds an #ifdef check to avoid the compile-time error, plus
a NULL pointer check before dereferencing THIS_MODULE. It might
be better to find a way to avoid using the module version
altogether.

Fixes: 2dc8f81e4f82 ("drm/amdgpu: SR-IOV data exchange between PF&VF")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Reviewed-By: Xiangliang Yu <Xiangliang.Yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e97f80f86005..4e4a476593e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -328,9 +328,11 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 					sizeof(amdgim_vf2pf_info));
 				AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version,
 					&str);
+#ifdef MODULE
 				if (THIS_MODULE->version != NULL)
 					strcpy(str, THIS_MODULE->version);
 				else
+#endif
 					strcpy(str, "N/A");
 				AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert,
 					0);
-- 
cgit 


From 37c5f2c99adf63adf13f1ca309a1ffce25c5589a Mon Sep 17 00:00:00 2001
From: Akshu Agrawal <akshu.agrawal@amd.com>
Date: Mon, 18 Sep 2017 12:26:07 +0530
Subject: drm/amd/amdgpu: Enabling ACP clock in hw_init (v2)

Enabling of ACP in hw_init does away with requirement of order
of probe on designware_i2s and acp dma driver. designware_i2s
reads i2s registers and this use to fail if acp dma driver was not probed
prior to it.

BUG=:b:62103837
TEST=modprobe snd-soc-acp-pcm
modprobe snd-soc-acp-rt5645-mach
aplay -l
**** List of PLAYBACK Hardware Devices ****
card 0: acprt5650 [acprt5650], device 0: RT5645_AIF1 rt5645-aif1-0 []
  Subdevices: 1/1
    Subdevice #0: subdevice #0

v2: use proper device in dev_err to fix warnings (Alex)

Signed-off-by: Akshu Agrawal <akshu.agrawal@amd.com>
Reviewed-on: https://chromium-review.googlesource.com/670207
Reviewed-by: Jason Clinton <jclinton@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/676628
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 159 +++++++++++++++++++++++++-------
 1 file changed, 124 insertions(+), 35 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index a52795d9b458..023bfdb3e63f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -35,41 +35,50 @@
 
 #include "acp_gfx_if.h"
 
-#define ACP_TILE_ON_MASK                0x03
-#define ACP_TILE_OFF_MASK               0x02
-#define ACP_TILE_ON_RETAIN_REG_MASK     0x1f
-#define ACP_TILE_OFF_RETAIN_REG_MASK    0x20
-
-#define ACP_TILE_P1_MASK                0x3e
-#define ACP_TILE_P2_MASK                0x3d
-#define ACP_TILE_DSP0_MASK              0x3b
-#define ACP_TILE_DSP1_MASK              0x37
-
-#define ACP_TILE_DSP2_MASK              0x2f
-
-#define ACP_DMA_REGS_END		0x146c0
-#define ACP_I2S_PLAY_REGS_START		0x14840
-#define ACP_I2S_PLAY_REGS_END		0x148b4
-#define ACP_I2S_CAP_REGS_START		0x148b8
-#define ACP_I2S_CAP_REGS_END		0x1496c
-
-#define ACP_I2S_COMP1_CAP_REG_OFFSET	0xac
-#define ACP_I2S_COMP2_CAP_REG_OFFSET	0xa8
-#define ACP_I2S_COMP1_PLAY_REG_OFFSET	0x6c
-#define ACP_I2S_COMP2_PLAY_REG_OFFSET	0x68
-
-#define mmACP_PGFSM_RETAIN_REG		0x51c9
-#define mmACP_PGFSM_CONFIG_REG		0x51ca
-#define mmACP_PGFSM_READ_REG_0		0x51cc
-
-#define mmACP_MEM_SHUT_DOWN_REQ_LO	0x51f8
-#define mmACP_MEM_SHUT_DOWN_REQ_HI	0x51f9
-#define mmACP_MEM_SHUT_DOWN_STS_LO	0x51fa
-#define mmACP_MEM_SHUT_DOWN_STS_HI	0x51fb
-
-#define ACP_TIMEOUT_LOOP		0x000000FF
-#define ACP_DEVS			3
-#define ACP_SRC_ID			162
+#define ACP_TILE_ON_MASK                	0x03
+#define ACP_TILE_OFF_MASK               	0x02
+#define ACP_TILE_ON_RETAIN_REG_MASK     	0x1f
+#define ACP_TILE_OFF_RETAIN_REG_MASK    	0x20
+
+#define ACP_TILE_P1_MASK                	0x3e
+#define ACP_TILE_P2_MASK                	0x3d
+#define ACP_TILE_DSP0_MASK              	0x3b
+#define ACP_TILE_DSP1_MASK              	0x37
+
+#define ACP_TILE_DSP2_MASK              	0x2f
+
+#define ACP_DMA_REGS_END			0x146c0
+#define ACP_I2S_PLAY_REGS_START			0x14840
+#define ACP_I2S_PLAY_REGS_END			0x148b4
+#define ACP_I2S_CAP_REGS_START			0x148b8
+#define ACP_I2S_CAP_REGS_END			0x1496c
+
+#define ACP_I2S_COMP1_CAP_REG_OFFSET		0xac
+#define ACP_I2S_COMP2_CAP_REG_OFFSET		0xa8
+#define ACP_I2S_COMP1_PLAY_REG_OFFSET		0x6c
+#define ACP_I2S_COMP2_PLAY_REG_OFFSET		0x68
+
+#define mmACP_PGFSM_RETAIN_REG			0x51c9
+#define mmACP_PGFSM_CONFIG_REG			0x51ca
+#define mmACP_PGFSM_READ_REG_0			0x51cc
+
+#define mmACP_MEM_SHUT_DOWN_REQ_LO		0x51f8
+#define mmACP_MEM_SHUT_DOWN_REQ_HI		0x51f9
+#define mmACP_MEM_SHUT_DOWN_STS_LO		0x51fa
+#define mmACP_MEM_SHUT_DOWN_STS_HI		0x51fb
+
+#define mmACP_CONTROL				0x5131
+#define mmACP_STATUS				0x5133
+#define mmACP_SOFT_RESET			0x5134
+#define ACP_CONTROL__ClkEn_MASK 		0x1
+#define ACP_SOFT_RESET__SoftResetAud_MASK 	0x100
+#define ACP_SOFT_RESET__SoftResetAudDone_MASK	0x1000000
+#define ACP_CLOCK_EN_TIME_OUT_VALUE		0x000000FF
+#define ACP_SOFT_RESET_DONE_TIME_OUT_VALUE	0x000000FF
+
+#define ACP_TIMEOUT_LOOP			0x000000FF
+#define ACP_DEVS				3
+#define ACP_SRC_ID				162
 
 enum {
 	ACP_TILE_P1 = 0,
@@ -260,6 +269,8 @@ static int acp_hw_init(void *handle)
 {
 	int r, i;
 	uint64_t acp_base;
+	u32 val = 0;
+	u32 count = 0;
 	struct device *dev;
 	struct i2s_platform_data *i2s_pdata;
 
@@ -400,6 +411,46 @@ static int acp_hw_init(void *handle)
 		}
 	}
 
+	/* Assert Soft reset of ACP */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+
+	val |= ACP_SOFT_RESET__SoftResetAud_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
+
+	count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE;
+	while (true) {
+		val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+		if (ACP_SOFT_RESET__SoftResetAudDone_MASK ==
+		    (val & ACP_SOFT_RESET__SoftResetAudDone_MASK))
+			break;
+		if (--count == 0) {
+			dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+			return -ETIMEDOUT;
+		}
+		udelay(100);
+	}
+	/* Enable clock to ACP and wait until the clock is enabled */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL);
+	val = val | ACP_CONTROL__ClkEn_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val);
+
+	count = ACP_CLOCK_EN_TIME_OUT_VALUE;
+
+	while (true) {
+		val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS);
+		if (val & (u32) 0x1)
+			break;
+		if (--count == 0) {
+			dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+			return -ETIMEDOUT;
+		}
+		udelay(100);
+	}
+	/* Deassert the SOFT RESET flags */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+	val &= ~ACP_SOFT_RESET__SoftResetAud_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
+
 	return 0;
 }
 
@@ -412,6 +463,8 @@ static int acp_hw_init(void *handle)
 static int acp_hw_fini(void *handle)
 {
 	int i, ret;
+	u32 val = 0;
+	u32 count = 0;
 	struct device *dev;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
@@ -419,6 +472,42 @@ static int acp_hw_fini(void *handle)
 	if (!adev->acp.acp_cell)
 		return 0;
 
+	/* Assert Soft reset of ACP */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+
+	val |= ACP_SOFT_RESET__SoftResetAud_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
+
+	count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE;
+	while (true) {
+		val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+		if (ACP_SOFT_RESET__SoftResetAudDone_MASK ==
+		    (val & ACP_SOFT_RESET__SoftResetAudDone_MASK))
+			break;
+		if (--count == 0) {
+			dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+			return -ETIMEDOUT;
+		}
+		udelay(100);
+	}
+	/* Disable ACP clock */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL);
+	val &= ~ACP_CONTROL__ClkEn_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val);
+
+	count = ACP_CLOCK_EN_TIME_OUT_VALUE;
+
+	while (true) {
+		val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS);
+		if (val & (u32) 0x1)
+			break;
+		if (--count == 0) {
+			dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+			return -ETIMEDOUT;
+		}
+		udelay(100);
+	}
+
 	if (adev->acp.acp_genpd) {
 		for (i = 0; i < ACP_DEVS ; i++) {
 			dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
-- 
cgit 


From 7da2e3e09e3186b672822aaade407d17eb35551f Mon Sep 17 00:00:00 2001
From: Roger He <Hongbo.He@amd.com>
Date: Thu, 2 Nov 2017 13:14:27 +0800
Subject: drm/amd/amdgpu: fix evicted VRAM bo adjudgement condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b577b717caa0..1f036af85ba6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1193,9 +1193,6 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	unsigned long num_pages = bo->mem.num_pages;
 	struct drm_mm_node *node = bo->mem.mm_node;
 
-	if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
-		return ttm_bo_eviction_valuable(bo, place);
-
 	switch (bo->mem.mem_type) {
 	case TTM_PL_TT:
 		return true;
@@ -1210,7 +1207,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 			num_pages -= node->size;
 			++node;
 		}
-		break;
+		return false;
 
 	default:
 		break;
-- 
cgit 


From 40a9960b046290939b56ce8e51f365258f27f264 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 30 Sep 2017 11:13:28 +0300
Subject: drm/amdgpu: potential uninitialized variable in
 amdgpu_vce_ring_parse_cs()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We shifted some code around in commit 9cca0b8e5df0 ("drm/amdgpu: move
amdgpu_cs_sysvm_access_required into find_mapping") and now my static
checker complains that "r" might not be initialized at the end of the
function.  I've reviewed the code, and that seems possible, but it's
also possible I may have missed something.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index b46280c1279f..2918de2f39ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -648,7 +648,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 	uint32_t allocated = 0;
 	uint32_t tmp, handle = 0;
 	uint32_t *size = &tmp;
-	int i, r, idx = 0;
+	int i, r = 0, idx = 0;
 
 	p->job->vm = NULL;
 	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
-- 
cgit 


From 78aa02c713fcf19e9bc8511ab61a5fd6c877cc01 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 30 Sep 2017 11:14:13 +0300
Subject: drm/amdgpu: Potential uninitialized variable in
 amdgpu_vm_update_directories()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After commit ea09729c9302 ("drm/amdgpu: rework page directory filling
v2") then it becomes a lot harder to verify that "r" is initialized.  My
static checker complains and so I've reviewed the code.  It does look
like it might be buggy... Anyway, it doesn't hurt to set "r" to zero
at the start.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 010d14195a5e..c8c26f21993c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1244,7 +1244,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm)
 {
-	int r;
+	int r = 0;
 
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->relocated)) {
-- 
cgit 


From dce1e131dd4dc68099ff1b70aa03cd2d0acf8639 Mon Sep 17 00:00:00 2001
From: Pixel Ding <Pixel.Ding@amd.com>
Date: Wed, 8 Nov 2017 10:20:01 +0800
Subject: drm/amdgpu: bypass lru touch for KIQ ring submission
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

KIQ ring submission is used for register accessing on SRIOV
VF that could happen both in irq enabled and irq disabled cases.
Inversion lock could happen on adev->ring_lru_list_lock, while
this operation is useless and just adds overhead in this use
case.

Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
Reviewed-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index e5ece1fae149..a98fbbb4739f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -136,7 +136,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 	if (ring->funcs->end_use)
 		ring->funcs->end_use(ring);
 
-	amdgpu_ring_lru_touch(ring->adev, ring);
+	if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)
+		amdgpu_ring_lru_touch(ring->adev, ring);
 }
 
 /**
-- 
cgit 


From cdd9a8b8599b952e2b39763090689ec2ad8e40c3 Mon Sep 17 00:00:00 2001
From: pding <Pixel.Ding@amd.com>
Date: Tue, 7 Nov 2017 14:32:36 +0800
Subject: drm/amdgpu: use irq-safe lock for kiq->ring_lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This lock is used during register accessing in SRIOV guest.
The register accessing could happen both in irq enabled and
irq disabled cases. Always use irq-safe lock.

Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 4e4a476593e8..6738df836a70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -114,18 +114,19 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 {
 	signed long r;
+	unsigned long flags;
 	uint32_t val, seq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_ring *ring = &kiq->ring;
 
 	BUG_ON(!ring->funcs->emit_rreg);
 
-	spin_lock(&kiq->ring_lock);
+	spin_lock_irqsave(&kiq->ring_lock, flags);
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_emit_rreg(ring, reg);
 	amdgpu_fence_emit_polling(ring, &seq);
 	amdgpu_ring_commit(ring);
-	spin_unlock(&kiq->ring_lock);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 	if (r < 1) {
@@ -140,18 +141,19 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 {
 	signed long r;
+	unsigned long flags;
 	uint32_t seq;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_ring *ring = &kiq->ring;
 
 	BUG_ON(!ring->funcs->emit_wreg);
 
-	spin_lock(&kiq->ring_lock);
+	spin_lock_irqsave(&kiq->ring_lock, flags);
 	amdgpu_ring_alloc(ring, 32);
 	amdgpu_ring_emit_wreg(ring, reg, v);
 	amdgpu_fence_emit_polling(ring, &seq);
 	amdgpu_ring_commit(ring);
-	spin_unlock(&kiq->ring_lock);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 	if (r < 1)
-- 
cgit