summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c514
1 files changed, 342 insertions, 172 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 72d9b92b1754..c7843e336310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -39,41 +39,9 @@
#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
/*
- * Fences
- * Fences mark an event in the GPUs pipeline and are used
- * for GPU/CPU synchronization. When the fence is written,
- * it is expected that all buffers associated with that fence
- * are no longer in use by the associated ring on the GPU and
- * that the the relevant GPU caches have been flushed.
- */
-
-struct amdgpu_fence {
- struct dma_fence base;
-
- /* RB, DMA, etc. */
- struct amdgpu_ring *ring;
-};
-
-static struct kmem_cache *amdgpu_fence_slab;
-
-int amdgpu_fence_slab_init(void)
-{
- amdgpu_fence_slab = kmem_cache_create(
- "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!amdgpu_fence_slab)
- return -ENOMEM;
- return 0;
-}
-
-void amdgpu_fence_slab_fini(void)
-{
- rcu_barrier();
- kmem_cache_destroy(amdgpu_fence_slab);
-}
-/*
* Cast helper
*/
static const struct dma_fence_ops amdgpu_fence_ops;
@@ -81,10 +49,7 @@ static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
{
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
- if (__f->base.ops == &amdgpu_fence_ops)
- return __f;
-
- return NULL;
+ return __f;
}
/**
@@ -128,33 +93,32 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* amdgpu_fence_emit - emit a fence on the requested ring
*
* @ring: ring the fence is associated with
- * @f: resulting fence object
+ * @af: amdgpu fence input
* @flags: flags to pass into the subordinate .emit_fence() call
*
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
- unsigned flags)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_fence *fence;
+ struct dma_fence *fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
- fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
- if (fence == NULL)
- return -ENOMEM;
+ fence = &af->base;
+ af->ring = ring;
seq = ++ring->fence_drv.sync_seq;
- fence->ring = ring;
- dma_fence_init(&fence->base, &amdgpu_fence_ops,
+ dma_fence_init(fence, &amdgpu_fence_ops,
&ring->fence_drv.lock,
- adev->fence_context + ring->idx,
- seq);
+ adev->fence_context + ring->idx, seq);
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
+ amdgpu_fence_save_wptr(af);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -172,12 +136,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
}
}
+ to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
/* This function can't be called concurrently anyway, otherwise
* emitting the fence would mess up the hardware ring buffer.
*/
- rcu_assign_pointer(*ptr, dma_fence_get(&fence->base));
-
- *f = &fence->base;
+ rcu_assign_pointer(*ptr, dma_fence_get(fence));
return 0;
}
@@ -246,7 +210,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
struct amdgpu_fence_driver *drv = &ring->fence_drv;
struct amdgpu_device *adev = ring->adev;
uint32_t seq, last_seq;
- int r;
do {
last_seq = atomic_read(&ring->fence_drv.last_seq);
@@ -254,7 +217,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (del_timer(&ring->fence_drv.fallback_timer) &&
+ if (timer_delete(&ring->fence_drv.fallback_timer) &&
seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
@@ -266,6 +229,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
do {
struct dma_fence *fence, **ptr;
+ struct amdgpu_fence *am_fence;
++last_seq;
last_seq &= drv->num_fences_mask;
@@ -278,14 +242,14 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
if (!fence)
continue;
- r = dma_fence_signal(fence);
- if (!r)
- DMA_FENCE_TRACE(fence, "signaled from irq context\n");
- else
- BUG();
-
+ /* Save the wptr in the fence driver so we know what the last processed
+ * wptr was. This is required for re-emitting the ring state for
+ * queues that are reset but are not guilty and thus have no guilty fence.
+ */
+ am_fence = container_of(fence, struct amdgpu_fence, base);
+ drv->signalled_wptr = am_fence->wptr;
+ dma_fence_signal(fence);
dma_fence_put(fence);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
} while (last_seq != seq);
@@ -301,11 +265,13 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
*/
static void amdgpu_fence_fallback(struct timer_list *t)
{
- struct amdgpu_ring *ring = from_timer(ring, t,
- fence_drv.fallback_timer);
+ struct amdgpu_ring *ring = timer_container_of(ring, t,
+ fence_drv.fallback_timer);
if (amdgpu_fence_process(ring))
- DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
+ dev_warn(ring->adev->dev,
+ "Fence fallback timer expired on ring %s\n",
+ ring->name);
}
/**
@@ -353,14 +319,11 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout)
{
- uint32_t seq;
-
- do {
- seq = amdgpu_fence_read(ring);
- udelay(5);
- timeout -= 5;
- } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0);
+ while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
+ udelay(2);
+ timeout -= 2;
+ }
return timeout > 0 ? timeout : 0;
}
/**
@@ -372,14 +335,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
* Returns the number of emitted fences on the ring. Used by the
* dynpm code to ring track activity.
*/
-unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
+unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
{
uint64_t emitted;
/* We are not protected by ring lock when reading the last sequence
* but it's ok to report slightly wrong fence count here.
*/
- amdgpu_fence_process(ring);
emitted = 0x100000000ull;
emitted -= atomic_read(&ring->fence_drv.last_seq);
emitted += READ_ONCE(ring->fence_drv.sync_seq);
@@ -387,6 +349,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
}
/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earliest fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+ uint32_t last_seq, sync_seq;
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+ sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+ if (last_seq == sync_seq)
+ return 0;
+
+ ++last_seq;
+ last_seq &= drv->num_fences_mask;
+ fence = drv->fences[last_seq];
+ if (!fence)
+ return 0;
+
+ return ktime_us_delta(ktime_get(),
+ to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+
+ seq &= drv->num_fences_mask;
+ fence = drv->fences[seq];
+ if (!fence)
+ return;
+
+ to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
+/**
* amdgpu_fence_driver_start_ring - make the fence driver
* ready for use on the requested ring.
*
@@ -401,14 +414,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
*/
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
- unsigned irq_type)
+ unsigned int irq_type)
{
struct amdgpu_device *adev = ring->adev;
uint64_t index;
if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
- ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
- ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
+ ring->fence_drv.cpu_addr = ring->fence_cpu_addr;
+ ring->fence_drv.gpu_addr = ring->fence_gpu_addr;
} else {
/* put fence directly behind firmware */
index = ALIGN(adev->uvd.fw->size, 8);
@@ -417,9 +430,6 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
}
amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
- if (irq_src)
- amdgpu_irq_get(adev, irq_src, irq_type);
-
ring->fence_drv.irq_src = irq_src;
ring->fence_drv.irq_type = irq_type;
ring->fence_drv.initialized = true;
@@ -434,24 +444,18 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* for the requested ring.
*
* @ring: ring to init the fence driver on
- * @num_hw_submission: number of entries on the hardware queue
- * @sched_score: optional score atomic shared with other schedulers
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
- unsigned num_hw_submission,
- atomic_t *sched_score)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- long timeout;
- int r;
if (!adev)
return -EINVAL;
- if (!is_power_of_2(num_hw_submission))
+ if (!is_power_of_2(ring->num_hw_submission))
return -EINVAL;
ring->fence_drv.cpu_addr = NULL;
@@ -462,46 +466,19 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
- ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
+ ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
spin_lock_init(&ring->fence_drv.lock);
- ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
+ ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
GFP_KERNEL);
+
if (!ring->fence_drv.fences)
return -ENOMEM;
- /* No need to setup the GPU scheduler for rings that don't need it */
- if (ring->no_scheduler)
- return 0;
-
- switch (ring->funcs->type) {
- case AMDGPU_RING_TYPE_GFX:
- timeout = adev->gfx_timeout;
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- timeout = adev->compute_timeout;
- break;
- case AMDGPU_RING_TYPE_SDMA:
- timeout = adev->sdma_timeout;
- break;
- default:
- timeout = adev->video_timeout;
- break;
- }
-
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
- num_hw_submission, amdgpu_job_hang_limit,
- timeout, sched_score, ring->name);
- if (r) {
- DRM_ERROR("Failed to create scheduler on ring %s.\n",
- ring->name);
- return r;
- }
-
return 0;
}
/**
- * amdgpu_fence_driver_init - init the fence driver
+ * amdgpu_fence_driver_sw_init - init the fence driver
* for all possible rings.
*
* @adev: amdgpu device pointer
@@ -512,20 +489,56 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
* amdgpu_fence_driver_start_ring().
* Returns 0 for success.
*/
-int amdgpu_fence_driver_init(struct amdgpu_device *adev)
+int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
{
return 0;
}
/**
- * amdgpu_fence_driver_fini - tear down the fence driver
+ * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
+ * fence driver interrupts need to be restored.
+ *
+ * @ring: ring that to be checked
+ *
+ * Interrupts for rings that belong to GFX IP don't need to be restored
+ * when the target power state is s0ix.
+ *
+ * Return true if need to restore interrupts, false otherwise.
+ */
+static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool is_gfx_power_domain = false;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_SDMA:
+ /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 0, 0))
+ is_gfx_power_domain = true;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ case AMDGPU_RING_TYPE_MES:
+ is_gfx_power_domain = true;
+ break;
+ default:
+ break;
+ }
+
+ return !(adev->in_s0ix && is_gfx_power_domain);
+}
+
+/**
+ * amdgpu_fence_driver_hw_fini - tear down the fence driver
* for all possible rings.
*
* @adev: amdgpu device pointer
*
* Tear down the fence driver for all possible rings (all asics).
*/
-void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev)
+void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
{
int i, r;
@@ -534,10 +547,9 @@ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev)
if (!ring || !ring->fence_drv.initialized)
continue;
- if (!ring->no_scheduler)
- drm_sched_fini(&ring->sched);
+
/* You can't wait for HW to signal if it's gone */
- if (!drm_dev_is_unplugged(&adev->ddev))
+ if (!drm_dev_is_unplugged(adev_to_drm(adev)))
r = amdgpu_fence_wait_empty(ring);
else
r = -ENODEV;
@@ -545,92 +557,116 @@ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev)
if (r)
amdgpu_fence_driver_force_completion(ring);
- if (ring->fence_drv.irq_src)
+ if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
+ ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
- del_timer_sync(&ring->fence_drv.fallback_timer);
+ timer_delete_sync(&ring->fence_drv.fallback_timer);
}
}
-void amdgpu_fence_driver_fini_sw(struct amdgpu_device *adev)
+/* Will either stop and flush handlers for amdgpu interrupt or reanble it */
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
{
- unsigned int i, j;
+ int i;
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->fence_drv.initialized)
+ if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
continue;
- for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
- dma_fence_put(ring->fence_drv.fences[j]);
- kfree(ring->fence_drv.fences);
- ring->fence_drv.fences = NULL;
- ring->fence_drv.initialized = false;
+ if (stop)
+ disable_irq(adev->irq.irq);
+ else
+ enable_irq(adev->irq.irq);
}
}
-/**
- * amdgpu_fence_driver_suspend - suspend the fence driver
- * for all possible rings.
- *
- * @adev: amdgpu device pointer
- *
- * Suspend the fence driver for all possible rings (all asics).
- */
-void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
+void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
{
- int i, r;
+ unsigned int i, j;
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
- /* wait for gpu to finish processing current batch */
- r = amdgpu_fence_wait_empty(ring);
- if (r) {
- /* delay GPU reset to resume */
- amdgpu_fence_driver_force_completion(ring);
- }
+ /*
+ * Notice we check for sched.ops since there's some
+ * override on the meaning of sched.ready by amdgpu.
+ * The natural check would be sched.ready, which is
+ * set as drm_sched_init() finishes...
+ */
+ if (ring->sched.ops)
+ drm_sched_fini(&ring->sched);
- /* disable the interrupt */
- if (ring->fence_drv.irq_src)
- amdgpu_irq_put(adev, ring->fence_drv.irq_src,
- ring->fence_drv.irq_type);
+ for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+ dma_fence_put(ring->fence_drv.fences[j]);
+ kfree(ring->fence_drv.fences);
+ ring->fence_drv.fences = NULL;
+ ring->fence_drv.initialized = false;
}
}
/**
- * amdgpu_fence_driver_resume - resume the fence driver
+ * amdgpu_fence_driver_hw_init - enable the fence driver
* for all possible rings.
*
* @adev: amdgpu device pointer
*
- * Resume the fence driver for all possible rings (all asics).
+ * Enable the fence driver for all possible rings (all asics).
* Not all asics have all rings, so each asic will only
* start the fence driver on the rings it has using
* amdgpu_fence_driver_start_ring().
* Returns 0 for success.
*/
-void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
+void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
{
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
/* enable the interrupt */
- if (ring->fence_drv.irq_src)
+ if (ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
amdgpu_irq_get(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
}
/**
+ * amdgpu_fence_driver_set_error - set error code on fences
+ * @ring: the ring which contains the fences
+ * @error: the error code to set
+ *
+ * Set an error code to all the fences pending on the ring.
+ */
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&drv->lock, flags);
+ for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) {
+ struct dma_fence *fence;
+
+ fence = rcu_dereference_protected(drv->fences[i],
+ lockdep_is_held(&drv->lock));
+ if (fence && !dma_fence_is_signaled_locked(fence))
+ dma_fence_set_error(fence, error);
+ }
+ spin_unlock_irqrestore(&drv->lock, flags);
+}
+
+/**
* amdgpu_fence_driver_force_completion - force signal latest fence of ring
*
* @ring: fence of the ring to signal
@@ -638,10 +674,125 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
*/
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
{
+ amdgpu_fence_driver_set_error(ring, -ECANCELED);
amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
amdgpu_fence_process(ring);
}
+
+/*
+ * Kernel queue reset handling
+ *
+ * The driver can reset individual queues for most engines, but those queues
+ * may contain work from multiple contexts. Resetting the queue will reset
+ * lose all of that state. In order to minimize the collateral damage, the
+ * driver will save the ring contents which are not associated with the guilty
+ * context prior to resetting the queue. After resetting the queue the queue
+ * contents from the other contexts is re-emitted to the rings so that it can
+ * be processed by the engine. To handle this, we save the queue's write
+ * pointer (wptr) in the fences associated with each context. If we get a
+ * queue timeout, we can then use the wptrs from the fences to determine
+ * which data needs to be saved out of the queue's ring buffer.
+ */
+
+/**
+ * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
+ *
+ * @af: fence of the ring to signal
+ *
+ */
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
+{
+ struct dma_fence *unprocessed;
+ struct dma_fence __rcu **ptr;
+ struct amdgpu_fence *fence;
+ struct amdgpu_ring *ring = af->ring;
+ unsigned long flags;
+ u32 seq, last_seq;
+
+ last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+ seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
+
+ /* mark all fences from the guilty context with an error */
+ spin_lock_irqsave(&ring->fence_drv.lock, flags);
+ do {
+ last_seq++;
+ last_seq &= ring->fence_drv.num_fences_mask;
+
+ ptr = &ring->fence_drv.fences[last_seq];
+ rcu_read_lock();
+ unprocessed = rcu_dereference(*ptr);
+
+ if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
+ fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+ if (fence == af)
+ dma_fence_set_error(&fence->base, -ETIME);
+ else if (fence->context == af->context)
+ dma_fence_set_error(&fence->base, -ECANCELED);
+ }
+ rcu_read_unlock();
+ } while (last_seq != seq);
+ spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
+ /* signal the guilty fence */
+ amdgpu_fence_write(ring, (u32)af->base.seqno);
+ amdgpu_fence_process(ring);
+}
+
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
+{
+ af->wptr = af->ring->wptr;
+}
+
+static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
+ u64 start_wptr, u32 end_wptr)
+{
+ unsigned int first_idx = start_wptr & ring->buf_mask;
+ unsigned int last_idx = end_wptr & ring->buf_mask;
+ unsigned int i;
+
+ /* Backup the contents of the ring buffer. */
+ for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+ ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
+}
+
+void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ struct dma_fence *unprocessed;
+ struct dma_fence __rcu **ptr;
+ struct amdgpu_fence *fence;
+ u64 wptr;
+ u32 seq, last_seq;
+
+ last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+ seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
+ wptr = ring->fence_drv.signalled_wptr;
+ ring->ring_backup_entries_to_copy = 0;
+
+ do {
+ last_seq++;
+ last_seq &= ring->fence_drv.num_fences_mask;
+
+ ptr = &ring->fence_drv.fences[last_seq];
+ rcu_read_lock();
+ unprocessed = rcu_dereference(*ptr);
+
+ if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
+ fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+ /* save everything if the ring is not guilty, otherwise
+ * just save the content from other contexts.
+ */
+ if (!guilty_fence || (fence->context != guilty_fence->context))
+ amdgpu_ring_backup_unprocessed_command(ring, wptr,
+ fence->wptr);
+ wptr = fence->wptr;
+ }
+ rcu_read_unlock();
+ } while (last_seq != seq);
+}
+
/*
* Common fence implementation
*/
@@ -653,8 +804,7 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- return (const char *)fence->ring->name;
+ return (const char *)to_amdgpu_fence(f)->ring->name;
}
/**
@@ -667,13 +817,8 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
*/
static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- struct amdgpu_ring *ring = fence->ring;
-
- if (!timer_pending(&ring->fence_drv.fallback_timer))
- amdgpu_fence_schedule_fallback(ring);
-
- DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
+ if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
return true;
}
@@ -688,8 +833,9 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
static void amdgpu_fence_free(struct rcu_head *rcu)
{
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- kmem_cache_free(amdgpu_fence_slab, fence);
+
+ /* free fence_slab if it's separated fence*/
+ kfree(to_amdgpu_fence(f));
}
/**
@@ -718,11 +864,12 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct amdgpu_device *adev = m->private;
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
@@ -775,9 +922,11 @@ static int gpu_recover_get(void *data, u64 *val)
return 0;
}
- *val = amdgpu_device_gpu_recover(adev, NULL);
+ if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
+ flush_work(&adev->reset_work);
+
+ *val = atomic_read(&adev->reset_domain->reset_res);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -787,6 +936,24 @@ DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
"%lld\n");
+static void amdgpu_debugfs_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ reset_work);
+
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USER;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
#endif
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
@@ -798,9 +965,12 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
&amdgpu_debugfs_fence_info_fops);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev)) {
+
+ INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
&amdgpu_debugfs_gpu_recover_fops);
+ }
#endif
}