summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gvt/scheduler.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c')
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c219
1 files changed, 127 insertions, 92 deletions
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 0fb1df71c637..63ad1fed525a 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -35,12 +35,18 @@
#include <linux/kthread.h>
+#include <drm/drm_print.h>
+
#include "gem/i915_gem_pm.h"
#include "gt/intel_context.h"
+#include "gt/intel_execlists_submission.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc.h"
#include "gt/intel_ring.h"
#include "i915_drv.h"
#include "i915_gem_gtt.h"
+#include "i915_perf_oa_regs.h"
#include "gvt.h"
#define RING_CTX_OFF(x) \
@@ -73,7 +79,7 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload)
}
/*
- * when populating shadow ctx from guest, we should not overrride oa related
+ * When populating shadow ctx from guest, we should not override oa related
* registers, so that they will not be overlapped by guest oa configs. Thus
* made it possible to capture oa data from host for both host and guests.
*/
@@ -135,6 +141,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
int i;
bool skip = false;
int ring_id = workload->engine->id;
+ int ret;
GEM_BUG_ON(!intel_context_is_pinned(ctx));
@@ -145,10 +152,10 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
#define COPY_REG(name) \
- intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
+ intel_gvt_read_gpa(vgpu, workload->ring_context_gpa \
+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
#define COPY_REG_MASKED(name) {\
- intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
+ intel_gvt_read_gpa(vgpu, workload->ring_context_gpa \
+ RING_CTX_OFF(name.val),\
&shadow_ring_context->name.val, 4);\
shadow_ring_context->name.val |= 0xffff << 16;\
@@ -161,16 +168,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
COPY_REG(bb_per_ctx_ptr);
COPY_REG(rcs_indirect_ctx);
COPY_REG(rcs_indirect_ctx_offset);
- }
+ } else if (workload->engine->id == BCS0)
+ intel_gvt_read_gpa(vgpu,
+ workload->ring_context_gpa +
+ BCS_TILE_REGISTER_VAL_OFFSET,
+ (void *)shadow_ring_context +
+ BCS_TILE_REGISTER_VAL_OFFSET, 4);
#undef COPY_REG
#undef COPY_REG_MASKED
- intel_gvt_hypervisor_read_gpa(vgpu,
+ /* don't copy Ring Context (the first 0x50 dwords),
+ * only copy the Engine Context part from guest
+ */
+ intel_gvt_read_gpa(vgpu,
workload->ring_context_gpa +
- sizeof(*shadow_ring_context),
+ RING_CTX_SIZE,
(void *)shadow_ring_context +
- sizeof(*shadow_ring_context),
- I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
+ RING_CTX_SIZE,
+ I915_GTT_PAGE_SIZE - RING_CTX_SIZE);
sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
@@ -232,11 +247,16 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
continue;
read:
- intel_gvt_hypervisor_read_gpa(vgpu, gpa_base, dst, gpa_size);
+ intel_gvt_read_gpa(vgpu, gpa_base, dst, gpa_size);
gpa_base = context_gpa;
gpa_size = I915_GTT_PAGE_SIZE;
dst = context_base + (i << I915_GTT_PAGE_SHIFT);
}
+ ret = intel_gvt_scan_engine_context(workload);
+ if (ret) {
+ gvt_vgpu_err("invalid cmd found in guest context pages\n");
+ return ret;
+ }
s->last_ctx[ring_id].valid = true;
return 0;
}
@@ -348,7 +368,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
u32 *cs;
int err;
- if (IS_GEN(req->i915, 9) && is_inhibit_context(req->context))
+ if (GRAPHICS_VER(req->engine->i915) == 9 && is_inhibit_context(req->context))
intel_vgpu_restore_inhibit_context(vgpu, req);
/*
@@ -396,13 +416,23 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
if (!wa_ctx->indirect_ctx.obj)
return;
+ i915_gem_object_lock(wa_ctx->indirect_ctx.obj, NULL);
i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
+ i915_gem_object_unlock(wa_ctx->indirect_ctx.obj);
i915_gem_object_put(wa_ctx->indirect_ctx.obj);
wa_ctx->indirect_ctx.obj = NULL;
wa_ctx->indirect_ctx.shadow_va = NULL;
}
+static void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr)
+{
+ struct scatterlist *sg = pd->pt.base->mm.pages->sgl;
+
+ /* This is not a good idea */
+ sg->dma_address = addr;
+}
+
static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
struct intel_context *ce)
{
@@ -411,7 +441,7 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
int i = 0;
if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
- px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0];
+ set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]);
} else {
for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
struct i915_page_directory * const pd =
@@ -421,7 +451,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
shadow ppgtt. */
if (!pd)
break;
- px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
+
+ set_dma_address(pd, mm->ppgtt_mm.shadow_pdps[i]);
}
}
}
@@ -495,12 +526,14 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
struct intel_gvt *gvt = workload->vgpu->gvt;
const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd;
struct intel_vgpu_shadow_bb *bb;
+ struct i915_gem_ww_ctx ww;
int ret;
list_for_each_entry(bb, &workload->shadow_bb, list) {
- /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
+ /*
+ * For privilege batch buffer and not wa_ctx, the bb_start_cmd_va
* is only updated into ring_scan_buffer, not real ring address
- * allocated in later copy_workload_to_ring_buffer. pls be noted
+ * allocated in later copy_workload_to_ring_buffer. Please be noted
* shadow_ring_buffer_va is now pointed to real ring buffer va
* in copy_workload_to_ring_buffer.
*/
@@ -509,28 +542,29 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+ bb->bb_offset;
- if (bb->ppgtt) {
- /* for non-priv bb, scan&shadow is only for
- * debugging purpose, so the content of shadow bb
- * is the same as original bb. Therefore,
- * here, rather than switch to shadow bb's gma
- * address, we directly use original batch buffer's
- * gma address, and send original bb to hardware
- * directly
- */
- if (bb->clflush & CLFLUSH_AFTER) {
- drm_clflush_virt_range(bb->va,
- bb->obj->base.size);
- bb->clflush &= ~CLFLUSH_AFTER;
- }
- i915_gem_object_finish_access(bb->obj);
- bb->accessing = false;
+ /*
+ * For non-priv bb, scan&shadow is only for
+ * debugging purpose, so the content of shadow bb
+ * is the same as original bb. Therefore,
+ * here, rather than switch to shadow bb's gma
+ * address, we directly use original batch buffer's
+ * gma address, and send original bb to hardware
+ * directly.
+ */
+ if (!bb->ppgtt) {
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ i915_gem_object_lock(bb->obj, &ww);
- } else {
- bb->vma = i915_gem_object_ggtt_pin(bb->obj,
- NULL, 0, 0, 0);
+ bb->vma = i915_gem_object_ggtt_pin_ww(bb->obj, &ww,
+ NULL, 0, 0, 0);
if (IS_ERR(bb->vma)) {
ret = PTR_ERR(bb->vma);
+ if (ret == -EDEADLK) {
+ ret = i915_gem_ww_ctx_backoff(&ww);
+ if (!ret)
+ goto retry;
+ }
goto err;
}
@@ -539,30 +573,19 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
if (gmadr_bytes == 8)
bb->bb_start_cmd_va[2] = 0;
- /* No one is going to touch shadow bb from now on. */
- if (bb->clflush & CLFLUSH_AFTER) {
- drm_clflush_virt_range(bb->va,
- bb->obj->base.size);
- bb->clflush &= ~CLFLUSH_AFTER;
- }
-
- ret = i915_gem_object_set_to_gtt_domain(bb->obj,
- false);
+ ret = i915_vma_move_to_active(bb->vma, workload->req,
+ __EXEC_OBJECT_NO_REQUEST_AWAIT);
if (ret)
goto err;
- ret = i915_vma_move_to_active(bb->vma,
- workload->req,
- 0);
- if (ret)
- goto err;
-
- i915_gem_object_finish_access(bb->obj);
- bb->accessing = false;
+ /* No one is going to touch shadow bb from now on. */
+ i915_gem_object_flush_map(bb->obj);
+ i915_gem_ww_ctx_fini(&ww);
}
}
return 0;
err:
+ i915_gem_ww_ctx_fini(&ww);
release_shadow_batch_buffer(workload);
return ret;
}
@@ -589,14 +612,29 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
unsigned char *per_ctx_va =
(unsigned char *)wa_ctx->indirect_ctx.shadow_va +
wa_ctx->indirect_ctx.size;
+ struct i915_gem_ww_ctx ww;
+ int ret;
if (wa_ctx->indirect_ctx.size == 0)
return 0;
- vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
- 0, CACHELINE_BYTES, 0);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ i915_gem_ww_ctx_init(&ww, false);
+retry:
+ i915_gem_object_lock(wa_ctx->indirect_ctx.obj, &ww);
+
+ vma = i915_gem_object_ggtt_pin_ww(wa_ctx->indirect_ctx.obj, &ww, NULL,
+ 0, CACHELINE_BYTES, 0);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ if (ret == -EDEADLK) {
+ ret = i915_gem_ww_ctx_backoff(&ww);
+ if (!ret)
+ goto retry;
+ }
+ return ret;
+ }
+
+ i915_gem_ww_ctx_fini(&ww);
/* FIXME: we are not tracking our pinned VMA leaving it
* up to the core to fix up the stray pin_count upon
@@ -630,15 +668,14 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
if (bb->obj) {
- if (bb->accessing)
- i915_gem_object_finish_access(bb->obj);
-
+ i915_gem_object_lock(bb->obj, NULL);
if (bb->va && !IS_ERR(bb->va))
i915_gem_object_unpin_map(bb->obj);
if (bb->vma && !IS_ERR(bb->vma))
i915_vma_unpin(bb->vma);
+ i915_gem_object_unlock(bb->obj);
i915_gem_object_put(bb->obj);
}
list_del(&bb->list);
@@ -661,6 +698,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload)
if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT ||
!workload->shadow_mm->ppgtt_mm.shadowed) {
+ intel_vgpu_unpin_mm(workload->shadow_mm);
gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
return -EINVAL;
}
@@ -831,7 +869,8 @@ pick_next_workload(struct intel_gvt *gvt, struct intel_engine_cs *engine)
goto out;
}
- if (!scheduler->current_vgpu->active ||
+ if (!test_bit(INTEL_VGPU_STATUS_ACTIVE,
+ scheduler->current_vgpu->status) ||
list_empty(workload_q_head(scheduler->current_vgpu, engine)))
goto out;
@@ -876,8 +915,7 @@ static void update_guest_pdps(struct intel_vgpu *vgpu,
gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val);
for (i = 0; i < 8; i++)
- intel_gvt_hypervisor_write_gpa(vgpu,
- gpa + i * 8, &pdp[7 - i], 4);
+ intel_gvt_write_gpa(vgpu, gpa + i * 8, &pdp[7 - i], 4);
}
static __maybe_unused bool
@@ -972,13 +1010,13 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
continue;
write:
- intel_gvt_hypervisor_write_gpa(vgpu, gpa_base, src, gpa_size);
+ intel_gvt_write_gpa(vgpu, gpa_base, src, gpa_size);
gpa_base = context_gpa;
gpa_size = I915_GTT_PAGE_SIZE;
src = context_base + (i << I915_GTT_PAGE_SHIFT);
}
- intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
+ intel_gvt_write_gpa(vgpu, workload->ring_context_gpa +
RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
shadow_ring_context = (void *) ctx->lrc_reg_state;
@@ -993,7 +1031,7 @@ write:
}
#define COPY_REG(name) \
- intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
+ intel_gvt_write_gpa(vgpu, workload->ring_context_gpa + \
RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
COPY_REG(ctx_ctrl);
@@ -1001,7 +1039,7 @@ write:
#undef COPY_REG
- intel_gvt_hypervisor_write_gpa(vgpu,
+ intel_gvt_write_gpa(vgpu,
workload->ring_context_gpa +
sizeof(*shadow_ring_context),
(void *)shadow_ring_context +
@@ -1013,13 +1051,12 @@ void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
intel_engine_mask_t engine_mask)
{
struct intel_vgpu_submission *s = &vgpu->submission;
- struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
struct intel_engine_cs *engine;
struct intel_vgpu_workload *pos, *n;
intel_engine_mask_t tmp;
- /* free the unsubmited workloads in the queues. */
- for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) {
+ /* free the unsubmitted workloads in the queues. */
+ for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) {
list_for_each_entry_safe(pos, n,
&s->workload_q_head[engine->id], list) {
list_del_init(&pos->list);
@@ -1116,7 +1153,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
static int workload_thread(void *arg)
{
struct intel_engine_cs *engine = arg;
- const bool need_force_wake = INTEL_GEN(engine->i915) >= 9;
+ const bool need_force_wake = GRAPHICS_VER(engine->i915) >= 9;
struct intel_gvt *gvt = engine->i915->gvt;
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
struct intel_vgpu_workload *workload = NULL;
@@ -1263,13 +1300,13 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
int i;
if (i915_vm_is_4lvl(&ppgtt->vm)) {
- px_dma(ppgtt->pd) = s->i915_context_pml4;
+ set_dma_address(ppgtt->pd, s->i915_context_pml4);
} else {
for (i = 0; i < GEN8_3LVL_PDPES; i++) {
struct i915_page_directory * const pd =
i915_pd_entry(ppgtt->pd, i);
- px_dma(pd) = s->i915_context_pdps[i];
+ set_dma_address(pd, s->i915_context_pdps[i]);
}
}
}
@@ -1291,7 +1328,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm));
for_each_engine(engine, vgpu->gvt->gt, id)
- intel_context_unpin(s->shadow[id]);
+ intel_context_put(s->shadow[id]);
kmem_cache_destroy(s->workloads);
}
@@ -1354,7 +1391,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
enum intel_engine_id i;
int ret;
- ppgtt = i915_ppgtt_create(&i915->gt);
+ ppgtt = i915_ppgtt_create(to_gt(i915), I915_BO_ALLOC_PM_EARLY);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
@@ -1377,16 +1414,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
intel_context_set_single_submission(ce);
/* Max ring buffer size */
- if (!intel_uc_wants_guc_submission(&engine->gt->uc)) {
- const unsigned int ring_size = 512 * SZ_4K;
-
- ce->ring = __intel_context_ring_size(ring_size);
- }
-
- ret = intel_context_pin(ce);
- intel_context_put(ce);
- if (ret)
- goto out_shadow_ctx;
+ if (!intel_uc_wants_guc_submission(&engine->gt->uc))
+ ce->ring_size = SZ_2M;
s->shadow[i] = ce;
}
@@ -1419,7 +1448,6 @@ out_shadow_ctx:
if (IS_ERR(s->shadow[i]))
break;
- intel_context_unpin(s->shadow[i]);
intel_context_put(s->shadow[i]);
}
i915_vm_put(&ppgtt->vm);
@@ -1493,6 +1521,7 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu_submission *s = &workload->vgpu->submission;
+ intel_context_unpin(s->shadow[workload->engine->id]);
release_shadow_batch_buffer(workload);
release_shadow_wa_ctx(&workload->wa_ctx);
@@ -1547,7 +1576,7 @@ static void read_guest_pdps(struct intel_vgpu *vgpu,
gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val);
for (i = 0; i < 8; i++)
- intel_gvt_hypervisor_read_gpa(vgpu,
+ intel_gvt_read_gpa(vgpu,
gpa + i * 8, &pdp[7 - i], 4);
}
@@ -1618,10 +1647,10 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu,
return ERR_PTR(-EINVAL);
}
- intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+ intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(ring_header.val), &head, 4);
- intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+ intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(ring_tail.val), &tail, 4);
guest_head = head;
@@ -1648,11 +1677,11 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu,
gvt_dbg_el("ring %s begin a new workload\n", engine->name);
/* record some ring buffer register values for scan and shadow */
- intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+ intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(rb_start.val), &start, 4);
- intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+ intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(rb_ctrl.val), &ctl, 4);
- intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+ intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
if (!intel_gvt_ggtt_validate_range(vgpu, start,
@@ -1675,9 +1704,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu,
workload->rb_ctl = ctl;
if (engine->id == RCS0) {
- intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+ intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4);
- intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+ intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4);
workload->wa_ctx.indirect_ctx.guest_gma =
@@ -1738,11 +1767,17 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu,
return ERR_PTR(ret);
}
+ ret = intel_context_pin(s->shadow[engine->id]);
+ if (ret) {
+ intel_vgpu_destroy_workload(workload);
+ return ERR_PTR(ret);
+ }
+
return workload;
}
/**
- * intel_vgpu_queue_workload - Qeue a vGPU workload
+ * intel_vgpu_queue_workload - Queue a vGPU workload
* @workload: the workload to queue in
*/
void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload)