drm/amdgpu: use ring/hash for fault handling on GMC9 v3

Further testing showed that the idea with the chash doesn't work as expected. Especially we can't predict when we can remove the entries from the hash again. So replace the chash with a ring buffer/hash mix where entries in the container age automatically based on their timestamp. v2: use ring buffer / hash mix v3: check the timeout to make sure all entries age Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> (v2) Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Christian König <christian.koenig@amd.com> 2018-11-07 13:55:01 +0100
committer: Alex Deucher <alexander.deucher@amd.com> 2019-03-19 15:36:58 -0500
commit: c1a8abd99da5a035f54cd7633b060033650ff3e0 (patch)
tree: b0e00e326f07b5b869cdb299fc47840481c42a1c /drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
parent: 8c65fe5fc81c37c98269389759bb1d90c4658953 (diff)
1 files changed, 55 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5a32a0d2ad31..250d9212cc38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -240,3 +240,58 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
 	dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
 			mc->agp_size >> 20, mc->agp_start, mc->agp_end);
 }
+
+/**
+ * amdgpu_gmc_filter_faults - filter VM faults
+ *
+ * @adev: amdgpu device structure
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ * @timestamp: timestamp of the fault
+ *
+ * Returns:
+ * True if the fault was filtered and should not be processed further.
+ * False if the fault is a new one and needs to be handled.
+ */
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
+			      uint16_t pasid, uint64_t timestamp)
+{
+	struct amdgpu_gmc *gmc = &adev->gmc;
+
+	uint64_t stamp, key = addr << 4 | pasid;
+	struct amdgpu_gmc_fault *fault;
+	uint32_t hash;
+
+	/* If we don't have space left in the ring buffer return immediately */
+	stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
+		AMDGPU_GMC_FAULT_TIMEOUT;
+	if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
+		return true;
+
+	/* Try to find the fault in the hash */
+	hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+	fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
+	while (fault->timestamp >= stamp) {
+		uint64_t tmp;
+
+		if (fault->key == key)
+			return true;
+
+		tmp = fault->timestamp;
+		fault = &gmc->fault_ring[fault->next];
+
+		/* Check if the entry was reused */
+		if (fault->timestamp >= tmp)
+			break;
+	}
+
+	/* Add the fault to the ring */
+	fault = &gmc->fault_ring[gmc->last_fault];
+	fault->key = key;
+	fault->timestamp = timestamp;
+
+	/* And update the hash */
+	fault->next = gmc->fault_hash[hash].idx;
+	gmc->fault_hash[hash].idx = gmc->last_fault++;
+	return false;
+}
author	Christian König <christian.koenig@amd.com>	2018-11-07 13:55:01 +0100
committer	Alex Deucher <alexander.deucher@amd.com>	2019-03-19 15:36:58 -0500
commit	c1a8abd99da5a035f54cd7633b060033650ff3e0 (patch)
tree	b0e00e326f07b5b869cdb299fc47840481c42a1c /drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
parent	8c65fe5fc81c37c98269389759bb1d90c4658953 (diff)