summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_request.c
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2020-10-09 12:56:02 -0300
committerJason Gunthorpe <jgg@nvidia.com>2020-10-16 12:40:58 -0300
commit16e7483e6f02973972f832b18042fd6c45fe26c0 (patch)
tree205122d1996a983619ccd55c572c34f037a76718 /drivers/gpu/drm/i915/i915_request.c
parentbf6a47644ea0928b2a6589ba9fb1221116d8bfaf (diff)
parent0c16d9635e3a51377e5815b9f8e14f497a4dbb42 (diff)
Merge branch 'dynamic_sg' into rdma.git for-next
From Maor Gottlieb says: ==================== This series extends __sg_alloc_table_from_pages to allow chaining of new pages to an already initialized SG table. This allows for drivers to utilize the optimization of merging contiguous pages without a need to pre allocate all the pages and hold them in a very large temporary buffer prior to the call to SG table initialization. The last patch changes the Infiniband core to use the new API. It removes duplicate functionality from the code and benefits from the optimization of allocating dynamic SG table from pages. In huge pages system of 2MB page size, without this change, the SG table would contain x512 SG entries. ==================== * branch 'dynamic_sg': RDMA/umem: Move to allocate SG table from pages lib/scatterlist: Add support in dynamic allocation of SG table from pages tools/testing/scatterlist: Show errors in human readable form tools/testing/scatterlist: Rejuvenate bit-rotten test
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r--drivers/gpu/drm/i915/i915_request.c25
1 files changed, 23 insertions, 2 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 0b2fe55e6194..781a6783affe 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -388,17 +388,38 @@ static bool __request_in_flight(const struct i915_request *signal)
* As we know that there are always preemption points between
* requests, we know that only the currently executing request
* may be still active even though we have cleared the flag.
- * However, we can't rely on our tracking of ELSP[0] to known
+ * However, we can't rely on our tracking of ELSP[0] to know
* which request is currently active and so maybe stuck, as
* the tracking maybe an event behind. Instead assume that
* if the context is still inflight, then it is still active
* even if the active flag has been cleared.
+ *
+ * To further complicate matters, if there a pending promotion, the HW
+ * may either perform a context switch to the second inflight execlists,
+ * or it may switch to the pending set of execlists. In the case of the
+ * latter, it may send the ACK and we process the event copying the
+ * pending[] over top of inflight[], _overwriting_ our *active. Since
+ * this implies the HW is arbitrating and not struck in *active, we do
+ * not worry about complete accuracy, but we do require no read/write
+ * tearing of the pointer [the read of the pointer must be valid, even
+ * as the array is being overwritten, for which we require the writes
+ * to avoid tearing.]
+ *
+ * Note that the read of *execlists->active may race with the promotion
+ * of execlists->pending[] to execlists->inflight[], overwritting
+ * the value at *execlists->active. This is fine. The promotion implies
+ * that we received an ACK from the HW, and so the context is not
+ * stuck -- if we do not see ourselves in *active, the inflight status
+ * is valid. If instead we see ourselves being copied into *active,
+ * we are inflight and may signal the callback.
*/
if (!intel_context_inflight(signal->context))
return false;
rcu_read_lock();
- for (port = __engine_active(signal->engine); (rq = *port); port++) {
+ for (port = __engine_active(signal->engine);
+ (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
+ port++) {
if (rq->context == signal->context) {
inflight = i915_seqno_passed(rq->fence.seqno,
signal->fence.seqno);