From d2b4b97933f5adacfba42dc3b9200d0e21fbe2c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:33 +0000 Subject: drm/i915: Record the default hw state after reset upon load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take a copy of the HW state after a reset upon module loading by executing a context switch from a blank context to the kernel context, thus saving the default hw state over the blank context image. We can then use the default hw state to initialise any future context, ensuring that each starts with the default view of hw state. v2: Unmap our default state from the GTT after stealing it from the context. This should stop us from accidentally overwriting it via the GTT (and frees up some precious GTT space). Testcase: igt/gem_ctx_isolation Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f6ded475bb2c..42cc61230ca7 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -723,8 +723,6 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) if (IS_ERR(vgpu->shadow_ctx)) return PTR_ERR(vgpu->shadow_ctx); - vgpu->shadow_ctx->engine[RCS].initialised = true; - bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); return 0; -- cgit From 874b6a910e6cc094629bd2634d14061cf5eb7690 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 20:08:18 +0800 Subject: drm/i915/gvt: Rename intel_vgpu_{init, clean}_gvt_context() To move workload related functions into scheduler.c, an expected way is to collect all the init/clean functions related to vGPU workload submission into fewer functions. Rename intel_vgpu_{init, clean}_gvt_context() for above usage in future. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 42cc61230ca7..5913bcb7b73e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -709,12 +709,29 @@ err: return ret; } -void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu) +/** + * intel_vgpu_clean_submission - free submission-related resource for vGPU + * @vgpu: a vGPU + * + * This function is called when a vGPU is being destroyed. + * + */ +void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { i915_gem_context_put(vgpu->shadow_ctx); } -int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) +/** + * intel_vgpu_setup_submission - setup submission-related resource for vGPU + * @vgpu: a vGPU + * + * This function is called when a vGPU is being created. + * + * Returns: + * Zero on success, negative error code if failed. + * + */ +int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) { atomic_set(&vgpu->running_workload_num, 0); -- cgit From 9a9829e9eb8bc4b4e870ce15a8904a32991608d5 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 20:28:09 +0800 Subject: drm/i915/gvt: Move workload cache init/clean into intel_vgpu_{setup, clean}_submission() Move vGPU workload cache initialization/de-initialization into intel_vgpu_{setup, clean}_submission() since they are not specific to execlist stuffs. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 5913bcb7b73e..81952139b00c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -719,6 +719,7 @@ err: void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { i915_gem_context_put(vgpu->shadow_ctx); + kmem_cache_destroy(vgpu->workloads); } /** @@ -733,7 +734,9 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) */ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) { - atomic_set(&vgpu->running_workload_num, 0); + enum intel_engine_id i; + struct intel_engine_cs *engine; + int ret; vgpu->shadow_ctx = i915_gem_context_create_gvt( &vgpu->gvt->dev_priv->drm); @@ -742,5 +745,24 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); + vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload", + sizeof(struct intel_vgpu_workload), 0, + SLAB_HWCACHE_ALIGN, + NULL); + + if (!vgpu->workloads) { + ret = -ENOMEM; + goto out_shadow_ctx; + } + + for_each_engine(engine, vgpu->gvt->dev_priv, i) + INIT_LIST_HEAD(&vgpu->workload_q_head[i]); + + atomic_set(&vgpu->running_workload_num, 0); + return 0; + +out_shadow_ctx: + i915_gem_context_put(vgpu->shadow_ctx); + return ret; } -- cgit From 1406a14b0ed977fc18f43398b391e4bb5d744174 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 21:15:18 +0800 Subject: drm/i915/gvt: Introduce intel_vgpu_submission Introduce intel_vgpu_submission to hold all members related to submission in struct intel_vgpu before. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 65 ++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 81952139b00c..864a2bc06e45 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -57,7 +57,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; + struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; @@ -249,12 +249,13 @@ void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) */ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; - struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct drm_i915_gem_request *rq; - struct intel_vgpu *vgpu = workload->vgpu; struct intel_ring *ring; int ret; @@ -267,7 +268,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - if (!test_and_set_bit(ring_id, vgpu->shadow_ctx_desc_updated)) + if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated)) shadow_context_descriptor_update(shadow_ctx, dev_priv->engine[ring_id]); @@ -326,9 +327,11 @@ err_scan: static int dispatch_workload(struct intel_vgpu_workload *workload) { + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; - struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; int ret = 0; @@ -414,7 +417,7 @@ static struct intel_vgpu_workload *pick_next_workload( gvt_dbg_sched("ring id %d pick new workload %p\n", ring_id, workload); - atomic_inc(&workload->vgpu->running_workload_num); + atomic_inc(&workload->vgpu->submission.running_workload_num); out: mutex_unlock(&gvt->lock); return workload; @@ -424,8 +427,9 @@ static void update_guest_context(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; @@ -491,15 +495,14 @@ static void update_guest_context(struct intel_vgpu_workload *workload) static void complete_current_workload(struct intel_gvt *gvt, int ring_id) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - struct intel_vgpu_workload *workload; - struct intel_vgpu *vgpu; + struct intel_vgpu_workload *workload = + scheduler->current_workload[ring_id]; + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; int event; mutex_lock(&gvt->lock); - workload = scheduler->current_workload[ring_id]; - vgpu = workload->vgpu; - /* For the workload w/ request, needs to wait for the context * switch to make sure request is completed. * For the workload w/o request, directly complete the workload. @@ -536,7 +539,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) } mutex_lock(&dev_priv->drm.struct_mutex); /* unpin shadow ctx as the shadow_ctx update is done */ - engine->context_unpin(engine, workload->vgpu->shadow_ctx); + engine->context_unpin(engine, s->shadow_ctx); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -548,7 +551,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) list_del_init(&workload->list); workload->complete(workload); - atomic_dec(&vgpu->running_workload_num); + atomic_dec(&s->running_workload_num); wake_up(&scheduler->workload_complete_wq); if (gvt->scheduler.need_reschedule) @@ -637,14 +640,15 @@ complete: void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu) { + struct intel_vgpu_submission *s = &vgpu->submission; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - if (atomic_read(&vgpu->running_workload_num)) { + if (atomic_read(&s->running_workload_num)) { gvt_dbg_sched("wait vgpu idle\n"); wait_event(scheduler->workload_complete_wq, - !atomic_read(&vgpu->running_workload_num)); + !atomic_read(&s->running_workload_num)); } } @@ -718,8 +722,10 @@ err: */ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { - i915_gem_context_put(vgpu->shadow_ctx); - kmem_cache_destroy(vgpu->workloads); + struct intel_vgpu_submission *s = &vgpu->submission; + + i915_gem_context_put(s->shadow_ctx); + kmem_cache_destroy(s->workloads); } /** @@ -734,35 +740,36 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) */ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) { + struct intel_vgpu_submission *s = &vgpu->submission; enum intel_engine_id i; struct intel_engine_cs *engine; int ret; - vgpu->shadow_ctx = i915_gem_context_create_gvt( + s->shadow_ctx = i915_gem_context_create_gvt( &vgpu->gvt->dev_priv->drm); - if (IS_ERR(vgpu->shadow_ctx)) - return PTR_ERR(vgpu->shadow_ctx); + if (IS_ERR(s->shadow_ctx)) + return PTR_ERR(s->shadow_ctx); - bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); + bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); - vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload", + s->workloads = kmem_cache_create("gvt-g_vgpu_workload", sizeof(struct intel_vgpu_workload), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!vgpu->workloads) { + if (!s->workloads) { ret = -ENOMEM; goto out_shadow_ctx; } for_each_engine(engine, vgpu->gvt->dev_priv, i) - INIT_LIST_HEAD(&vgpu->workload_q_head[i]); + INIT_LIST_HEAD(&s->workload_q_head[i]); - atomic_set(&vgpu->running_workload_num, 0); + atomic_set(&s->running_workload_num, 0); return 0; out_shadow_ctx: - i915_gem_context_put(vgpu->shadow_ctx); + i915_gem_context_put(s->shadow_ctx); return ret; } -- cgit From 91d5d85442b2a65e5f4e1726565c1c1a8ba9976f Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 21:33:20 +0800 Subject: drm/i915/gvt: Move tlb_handle_pending into intel_vgpu_submission Move tlb_handle_pending into intel_vgpu_submssion since it belongs to a part of vGPU submission stuffs Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 864a2bc06e45..7cb1cf4223ed 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -766,6 +766,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) INIT_LIST_HEAD(&s->workload_q_head[i]); atomic_set(&s->running_workload_num, 0); + bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); return 0; -- cgit From 5c56883a9531cd89561fb9a11a33697f2847c82a Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 20 Sep 2017 05:36:47 +0800 Subject: drm/i915/gvt: Change the return type during command scan Generally, there are 3 types of errors during command scan: a) some commands might be unknown with EBADRQC; b) some cmd access invalid address with EFAULT; c) some unexpected force nonpriv cmd with EPERM. later the healthy state can be judged through the return error. v2: - remove some internal i915 errors rating. (Zhenyu) v3: - the healthy state is judged through the internal defined return error. (Zhenyu) - force non priv cmd error can be ignored. (Kevin) v4: - reuse standard defined errno instead of recreate, e.g EBADRQC for unknown cmd, EFAULT for invalid address, EPERM for nonpriv. (Zhenyu) v5: - remove some irrelevant code for the patch. - fix typo of vgpu_is_vm_unhealthy. (Zhenyu) v6: - move the healthy check and failsafe code into another patch. (Zhenyu) v7: - polish title and commit message. (Zhenyu) Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 7cb1cf4223ed..0771b715f825 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -84,7 +84,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("Invalid guest context descriptor\n"); - return -EINVAL; + return -EFAULT; } page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); -- cgit From e011c6ce2b4fc7c577ade41485d74431a4e6ea1a Mon Sep 17 00:00:00 2001 From: fred gao Date: Tue, 19 Sep 2017 15:11:28 +0800 Subject: drm/i915/gvt: Add VM healthy check for workload_thread When a scan error occurs in dispatch_workload, this patch is to check the healthy state and free all the queued workloads before the failsafe mode is entered. Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 0771b715f825..02af14023383 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -634,6 +634,13 @@ complete: FORCEWAKE_ALL); intel_runtime_pm_put(gvt->dev_priv); + if (ret && (vgpu_is_vm_unhealthy(ret))) { + mutex_lock(&gvt->lock); + intel_vgpu_clean_execlist(vgpu); + mutex_unlock(&gvt->lock); + enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); + } + } return 0; } -- cgit From 21527a8dafc40fc499ae57492c1c5d0098cbcf08 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 12 Sep 2017 21:42:09 +0800 Subject: drm/i915/gvt: Factor out vGPU workload creation/destroy Factor out vGPU workload creation/destroy functions since they are not specific to execlist emulation. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 51 ++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 02af14023383..10ccb05d0e8d 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -781,3 +781,54 @@ out_shadow_ctx: i915_gem_context_put(s->shadow_ctx); return ret; } + +/** + * intel_vgpu_destroy_workload - destroy a vGPU workload + * @vgpu: a vGPU + * + * This function is called when destroy a vGPU workload. + * + */ +void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu_submission *s = &workload->vgpu->submission; + + if (workload->shadow_mm) + intel_gvt_mm_unreference(workload->shadow_mm); + + kmem_cache_free(s->workloads, workload); +} + +/** + * intel_vgpu_create_workload - create a vGPU workload + * @vgpu: a vGPU + * + * This function is called when creating a vGPU workload. + * + * Returns: + * struct intel_vgpu_workload * on success, negative error code in + * pointer if failed. + * + */ +struct intel_vgpu_workload * +intel_vgpu_create_workload(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_workload *workload; + + workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL); + if (!workload) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&workload->list); + INIT_LIST_HEAD(&workload->shadow_bb); + + init_waitqueue_head(&workload->shadow_ctx_status_wq); + atomic_set(&workload->shadow_ctx_active, 0); + + workload->status = -EINPROGRESS; + workload->shadowed = false; + workload->vgpu = vgpu; + + return workload; +} -- cgit From 497aa3f5e3bdb6bea5994f7075e2f2df2377d70e Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 12 Sep 2017 21:51:10 +0800 Subject: drm/i915/gvt: Factor out prepare_workload() Factor out prepare_workload() for the following re-factor. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 10ccb05d0e8d..3d1435f55c7b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -325,6 +325,16 @@ err_scan: return ret; } +static int prepare_workload(struct intel_vgpu_workload *workload) +{ + int ret = 0; + + if (workload->prepare) + ret = workload->prepare(workload); + + return ret; +} + static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; @@ -344,12 +354,10 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) if (ret) goto out; - if (workload->prepare) { - ret = workload->prepare(workload); - if (ret) { - engine->context_unpin(engine, shadow_ctx); - goto out; - } + ret = prepare_workload(workload); + if (ret) { + engine->context_unpin(engine, shadow_ctx); + goto out; } out: -- cgit From d8235b5e55845de19983cec38af245cc200b81e2 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 12 Sep 2017 22:06:39 +0800 Subject: drm/i915/gvt: Move common workload preparation into prepare_workload() Move common workload preparation into prepare_workload() in scheduler.c, as they are not specific to execlist emulation. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 152 ++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 3d1435f55c7b..a7a67cc65a07 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -325,13 +325,157 @@ err_scan: return ret; } +static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) +{ + struct intel_gvt *gvt = workload->vgpu->gvt; + const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd; + struct intel_shadow_bb_entry *entry_obj; + + /* pin the gem object to ggtt */ + list_for_each_entry(entry_obj, &workload->shadow_bb, list) { + struct i915_vma *vma; + + vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + /* FIXME: we are not tracking our pinned VMA leaving it + * up to the core to fix up the stray pin_count upon + * free. + */ + + /* update the relocate gma with shadow batch buffer*/ + entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma); + if (gmadr_bytes == 8) + entry_obj->bb_start_cmd_va[2] = 0; + } + return 0; +} + +static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) +{ + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); + int ring_id = workload->ring_id; + struct intel_vgpu_submission *s = &workload->vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + struct drm_i915_gem_object *ctx_obj = + shadow_ctx->engine[ring_id].state->obj; + struct execlist_ring_context *shadow_ring_context; + struct page *page; + + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); + shadow_ring_context = kmap_atomic(page); + + shadow_ring_context->bb_per_ctx_ptr.val = + (shadow_ring_context->bb_per_ctx_ptr.val & + (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma; + shadow_ring_context->rcs_indirect_ctx.val = + (shadow_ring_context->rcs_indirect_ctx.val & + (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; + + kunmap_atomic(shadow_ring_context); + return 0; +} + +static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) +{ + struct i915_vma *vma; + unsigned char *per_ctx_va = + (unsigned char *)wa_ctx->indirect_ctx.shadow_va + + wa_ctx->indirect_ctx.size; + + if (wa_ctx->indirect_ctx.size == 0) + return 0; + + vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, + 0, CACHELINE_BYTES, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + /* FIXME: we are not tracking our pinned VMA leaving it + * up to the core to fix up the stray pin_count upon + * free. + */ + + wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma); + + wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1); + memset(per_ctx_va, 0, CACHELINE_BYTES); + + update_wa_ctx_2_shadow_ctx(wa_ctx); + return 0; +} + +static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) +{ + /* release all the shadow batch buffer */ + if (!list_empty(&workload->shadow_bb)) { + struct intel_shadow_bb_entry *entry_obj = + list_first_entry(&workload->shadow_bb, + struct intel_shadow_bb_entry, + list); + struct intel_shadow_bb_entry *temp; + + list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb, + list) { + i915_gem_object_unpin_map(entry_obj->obj); + i915_gem_object_put(entry_obj->obj); + list_del(&entry_obj->list); + kfree(entry_obj); + } + } +} + static int prepare_workload(struct intel_vgpu_workload *workload) { + struct intel_vgpu *vgpu = workload->vgpu; int ret = 0; - if (workload->prepare) + ret = intel_vgpu_pin_mm(workload->shadow_mm); + if (ret) { + gvt_vgpu_err("fail to vgpu pin mm\n"); + return ret; + } + + ret = intel_vgpu_sync_oos_pages(workload->vgpu); + if (ret) { + gvt_vgpu_err("fail to vgpu sync oos pages\n"); + goto err_unpin_mm; + } + + ret = intel_vgpu_flush_post_shadow(workload->vgpu); + if (ret) { + gvt_vgpu_err("fail to flush post shadow\n"); + goto err_unpin_mm; + } + + ret = prepare_shadow_batch_buffer(workload); + if (ret) { + gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); + goto err_unpin_mm; + } + + ret = prepare_shadow_wa_ctx(&workload->wa_ctx); + if (ret) { + gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n"); + goto err_shadow_batch; + } + + if (workload->prepare) { ret = workload->prepare(workload); + if (ret) + goto err_shadow_wa_ctx; + } + return 0; +err_shadow_wa_ctx: + release_shadow_wa_ctx(&workload->wa_ctx); +err_shadow_batch: + release_shadow_batch_buffer(workload); +err_unpin_mm: + intel_vgpu_unpin_mm(workload->shadow_mm); return ret; } @@ -557,6 +701,12 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) scheduler->current_workload[ring_id] = NULL; list_del_init(&workload->list); + + if (!workload->status) { + release_shadow_batch_buffer(workload); + release_shadow_wa_ctx(&workload->wa_ctx); + } + workload->complete(workload); atomic_dec(&s->running_workload_num); -- cgit From 6d76303553bab75ffc53993c56aad06251d8de60 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 12 Sep 2017 22:33:12 +0800 Subject: drm/i915/gvt: Move common vGPU workload creation into scheduler.c Move common vGPU workload creation functions into scheduler.c since they are not specific to execlist emulation. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 196 +++++++++++++++++++++++++++++++---- 1 file changed, 178 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index a7a67cc65a07..69893f29ff6d 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -792,13 +792,8 @@ complete: FORCEWAKE_ALL); intel_runtime_pm_put(gvt->dev_priv); - if (ret && (vgpu_is_vm_unhealthy(ret))) { - mutex_lock(&gvt->lock); - intel_vgpu_clean_execlist(vgpu); - mutex_unlock(&gvt->lock); + if (ret && (vgpu_is_vm_unhealthy(ret))) enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); - } - } return 0; } @@ -957,9 +952,90 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) kmem_cache_free(s->workloads, workload); } +static struct intel_vgpu_workload * +alloc_workload(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_workload *workload; + + workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL); + if (!workload) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&workload->list); + INIT_LIST_HEAD(&workload->shadow_bb); + + init_waitqueue_head(&workload->shadow_ctx_status_wq); + atomic_set(&workload->shadow_ctx_active, 0); + + workload->status = -EINPROGRESS; + workload->shadowed = false; + workload->vgpu = vgpu; + + return workload; +} + +#define RING_CTX_OFF(x) \ + offsetof(struct execlist_ring_context, x) + +static void read_guest_pdps(struct intel_vgpu *vgpu, + u64 ring_context_gpa, u32 pdp[8]) +{ + u64 gpa; + int i; + + gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val); + + for (i = 0; i < 8; i++) + intel_gvt_hypervisor_read_gpa(vgpu, + gpa + i * 8, &pdp[7 - i], 4); +} + +static int prepare_mm(struct intel_vgpu_workload *workload) +{ + struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; + struct intel_vgpu_mm *mm; + struct intel_vgpu *vgpu = workload->vgpu; + int page_table_level; + u32 pdp[8]; + + if (desc->addressing_mode == 1) { /* legacy 32-bit */ + page_table_level = 3; + } else if (desc->addressing_mode == 3) { /* legacy 64 bit */ + page_table_level = 4; + } else { + gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); + return -EINVAL; + } + + read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp); + + mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp); + if (mm) { + intel_gvt_mm_reference(mm); + } else { + + mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT, + pdp, page_table_level, 0); + if (IS_ERR(mm)) { + gvt_vgpu_err("fail to create mm object.\n"); + return PTR_ERR(mm); + } + } + workload->shadow_mm = mm; + return 0; +} + +#define same_context(a, b) (((a)->context_id == (b)->context_id) && \ + ((a)->lrca == (b)->lrca)) + +#define get_last_workload(q) \ + (list_empty(q) ? NULL : container_of(q->prev, \ + struct intel_vgpu_workload, list)) /** * intel_vgpu_create_workload - create a vGPU workload * @vgpu: a vGPU + * @desc: a guest context descriptor * * This function is called when creating a vGPU workload. * @@ -969,24 +1045,108 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) * */ struct intel_vgpu_workload * -intel_vgpu_create_workload(struct intel_vgpu *vgpu) +intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, + struct execlist_ctx_descriptor_format *desc) { struct intel_vgpu_submission *s = &vgpu->submission; - struct intel_vgpu_workload *workload; + struct list_head *q = workload_q_head(vgpu, ring_id); + struct intel_vgpu_workload *last_workload = get_last_workload(q); + struct intel_vgpu_workload *workload = NULL; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u64 ring_context_gpa; + u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; + int ret; - workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL); - if (!workload) - return ERR_PTR(-ENOMEM); + ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, + (u32)((desc->lrca + 1) << GTT_PAGE_SHIFT)); + if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); + return ERR_PTR(-EINVAL); + } - INIT_LIST_HEAD(&workload->list); - INIT_LIST_HEAD(&workload->shadow_bb); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(ring_header.val), &head, 4); - init_waitqueue_head(&workload->shadow_ctx_status_wq); - atomic_set(&workload->shadow_ctx_active, 0); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(ring_tail.val), &tail, 4); - workload->status = -EINPROGRESS; - workload->shadowed = false; - workload->vgpu = vgpu; + head &= RB_HEAD_OFF_MASK; + tail &= RB_TAIL_OFF_MASK; + + if (last_workload && same_context(&last_workload->ctx_desc, desc)) { + gvt_dbg_el("ring id %d cur workload == last\n", ring_id); + gvt_dbg_el("ctx head %x real head %lx\n", head, + last_workload->rb_tail); + /* + * cannot use guest context head pointer here, + * as it might not be updated at this time + */ + head = last_workload->rb_tail; + } + + gvt_dbg_el("ring id %d begin a new workload\n", ring_id); + + /* record some ring buffer register values for scan and shadow */ + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(rb_start.val), &start, 4); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(rb_ctrl.val), &ctl, 4); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); + + workload = alloc_workload(vgpu); + if (IS_ERR(workload)) + return workload; + + workload->ring_id = ring_id; + workload->ctx_desc = *desc; + workload->ring_context_gpa = ring_context_gpa; + workload->rb_head = head; + workload->rb_tail = tail; + workload->rb_start = start; + workload->rb_ctl = ctl; + + if (ring_id == RCS) { + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4); + + workload->wa_ctx.indirect_ctx.guest_gma = + indirect_ctx & INDIRECT_CTX_ADDR_MASK; + workload->wa_ctx.indirect_ctx.size = + (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * + CACHELINE_BYTES; + workload->wa_ctx.per_ctx.guest_gma = + per_ctx & PER_CTX_ADDR_MASK; + workload->wa_ctx.per_ctx.valid = per_ctx & 1; + } + + gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", + workload, ring_id, head, tail, start, ctl); + + ret = prepare_mm(workload); + if (ret) { + kmem_cache_free(s->workloads, workload); + return ERR_PTR(ret); + } + + /* Only scan and shadow the first workload in the queue + * as there is only one pre-allocated buf-obj for shadow. + */ + if (list_empty(workload_q_head(vgpu, ring_id))) { + intel_runtime_pm_get(dev_priv); + mutex_lock(&dev_priv->drm.struct_mutex); + ret = intel_gvt_scan_and_shadow_workload(workload); + mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(dev_priv); + } + + if (ret && (vgpu_is_vm_unhealthy(ret))) { + enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); + intel_vgpu_destroy_workload(workload); + return ERR_PTR(ret); + } return workload; } -- cgit From ad1d36369b07f6b9db81897802ee5d8764eaa922 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 13 Sep 2017 00:31:29 +0800 Subject: drm/i915/gvt: Introduce vGPU submission ops Introduce vGPU submission ops to support easy switching submission mode of one vGPU between different OSes. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 69893f29ff6d..f3be88fa88dd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -884,6 +884,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { struct intel_vgpu_submission *s = &vgpu->submission; + intel_vgpu_select_submission_ops(vgpu, 0); i915_gem_context_put(s->shadow_ctx); kmem_cache_destroy(s->workloads); } @@ -935,6 +936,58 @@ out_shadow_ctx: return ret; } +/** + * intel_vgpu_select_submission_ops - select virtual submission interface + * @vgpu: a vGPU + * @interface: expected vGPU virtual submission interface + * + * This function is called when guest configures submission interface. + * + * Returns: + * Zero on success, negative error code if failed. + * + */ +int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, + unsigned int interface) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + const struct intel_vgpu_submission_ops *ops[] = { + [INTEL_VGPU_EXECLIST_SUBMISSION] = + &intel_vgpu_execlist_submission_ops, + }; + int ret; + + if (WARN_ON(interface >= ARRAY_SIZE(ops))) + return -EINVAL; + + if (s->active) { + s->ops->clean(vgpu); + s->active = false; + gvt_dbg_core("vgpu%d: de-select ops [ %s ] \n", + vgpu->id, s->ops->name); + } + + if (interface == 0) { + s->ops = NULL; + s->virtual_submission_interface = 0; + gvt_dbg_core("vgpu%d: no submission ops\n", vgpu->id); + return 0; + } + + ret = ops[interface]->init(vgpu); + if (ret) + return ret; + + s->ops = ops[interface]; + s->virtual_submission_interface = interface; + s->active = true; + + gvt_dbg_core("vgpu%d: activate ops [ %s ]\n", + vgpu->id, s->ops->name); + + return 0; +} + /** * intel_vgpu_destroy_workload - destroy a vGPU workload * @vgpu: a vGPU -- cgit From 06bb372f9ace47296aeaaca8e130d948ea2855cf Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 13 Sep 2017 01:41:35 +0800 Subject: drm/i915/gvt: Introduce intel_vgpu_reset_submission Introduce an generic API to reset vGPU virtual submission interface. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f3be88fa88dd..88ce57116a4c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -889,6 +889,26 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) kmem_cache_destroy(s->workloads); } + +/** + * intel_vgpu_reset_submission - reset submission-related resource for vGPU + * @vgpu: a vGPU + * @engine_mask: engines expected to be reset + * + * This function is called when a vGPU is being destroyed. + * + */ +void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, + unsigned long engine_mask) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + + if (!s->active) + return; + + s->ops->reset(vgpu, engine_mask); +} + /** * intel_vgpu_setup_submission - setup submission-related resource for vGPU * @vgpu: a vGPU -- cgit From e2c43c0111d54d4857d052fcfca9f3f16bf1b1b2 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 13 Sep 2017 01:58:35 +0800 Subject: drm/i915/gvt: Move clean_workloads() into scheduler.c Move clean_workloads() into scheduler.c since it's not specific to execlist. v2: - Remove clean_workloads in intel_vgpu_select_submission_ops. (Zhenyu) Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 88ce57116a4c..391690c0c28c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -644,6 +644,25 @@ static void update_guest_context(struct intel_vgpu_workload *workload) kunmap(page); } +static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_engine_cs *engine; + struct intel_vgpu_workload *pos, *n; + unsigned int tmp; + + /* free the unsubmited workloads in the queues. */ + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + list_for_each_entry_safe(pos, n, + &s->workload_q_head[engine->id], list) { + list_del_init(&pos->list); + intel_vgpu_destroy_workload(pos); + } + clear_bit(engine->id, s->shadow_ctx_desc_updated); + } +} + static void complete_current_workload(struct intel_gvt *gvt, int ring_id) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; @@ -707,6 +726,23 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) release_shadow_wa_ctx(&workload->wa_ctx); } + if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { + /* if workload->status is not successful means HW GPU + * has occurred GPU hang or something wrong with i915/GVT, + * and GVT won't inject context switch interrupt to guest. + * So this error is a vGPU hang actually to the guest. + * According to this we should emunlate a vGPU hang. If + * there are pending workloads which are already submitted + * from guest, we should clean them up like HW GPU does. + * + * if it is in middle of engine resetting, the pending + * workloads won't be submitted to HW GPU and will be + * cleaned up during the resetting process later, so doing + * the workload clean up here doesn't have any impact. + **/ + clean_workloads(vgpu, ENGINE_MASK(ring_id)); + } + workload->complete(workload); atomic_dec(&s->running_workload_num); @@ -906,6 +942,7 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, if (!s->active) return; + clean_workloads(vgpu, engine_mask); s->ops->reset(vgpu, engine_mask); } -- cgit From f52c380a48f527930c86ea6fd7242873c93ba682 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 24 Sep 2017 21:53:03 +0800 Subject: drm/i915/gvt: Refine shadow batch buffer 1) Use standard i915 GEM object sequence to access the shadow batch buffer. 2) Manage i915 vma life cycle to solve one FIXME. v2: - Refine code structure. - Refine the usage of GEM APIs. - Add the missing lock/unlock in release_shadow_batch_buffer. Test on my SKL NuC. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 88 ++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 391690c0c28c..f2d4c90ea1d4 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -325,31 +325,46 @@ err_scan: return ret; } +static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload); + static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) { struct intel_gvt *gvt = workload->vgpu->gvt; const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd; - struct intel_shadow_bb_entry *entry_obj; + struct intel_vgpu_shadow_bb *bb; + int ret; - /* pin the gem object to ggtt */ - list_for_each_entry(entry_obj, &workload->shadow_bb, list) { - struct i915_vma *vma; + list_for_each_entry(bb, &workload->shadow_bb, list) { + bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0); + if (IS_ERR(bb->vma)) { + ret = PTR_ERR(bb->vma); + goto err; + } - vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); + /* relocate shadow batch buffer */ + bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); + if (gmadr_bytes == 8) + bb->bb_start_cmd_va[2] = 0; - /* FIXME: we are not tracking our pinned VMA leaving it - * up to the core to fix up the stray pin_count upon - * free. - */ + /* No one is going to touch shadow bb from now on. */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } - /* update the relocate gma with shadow batch buffer*/ - entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma); - if (gmadr_bytes == 8) - entry_obj->bb_start_cmd_va[2] = 0; + ret = i915_gem_object_set_to_gtt_domain(bb->obj, false); + if (ret) + goto err; + + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + i915_vma_move_to_active(bb->vma, workload->req, 0); } return 0; +err: + release_shadow_batch_buffer(workload); + return ret; } static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) @@ -410,22 +425,37 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) { - /* release all the shadow batch buffer */ - if (!list_empty(&workload->shadow_bb)) { - struct intel_shadow_bb_entry *entry_obj = - list_first_entry(&workload->shadow_bb, - struct intel_shadow_bb_entry, - list); - struct intel_shadow_bb_entry *temp; - - list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb, - list) { - i915_gem_object_unpin_map(entry_obj->obj); - i915_gem_object_put(entry_obj->obj); - list_del(&entry_obj->list); - kfree(entry_obj); + struct intel_vgpu *vgpu = workload->vgpu; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_vgpu_shadow_bb *bb, *pos; + + if (list_empty(&workload->shadow_bb)) + return; + + bb = list_first_entry(&workload->shadow_bb, + struct intel_vgpu_shadow_bb, list); + + mutex_lock(&dev_priv->drm.struct_mutex); + + list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { + if (bb->obj) { + if (bb->accessing) + i915_gem_obj_finish_shmem_access(bb->obj); + + if (bb->va && !IS_ERR(bb->va)) + i915_gem_object_unpin_map(bb->obj); + + if (bb->vma && !IS_ERR(bb->vma)) { + i915_vma_unpin(bb->vma); + i915_vma_close(bb->vma); + } + __i915_gem_object_release_unless_active(bb->obj); } + list_del(&bb->list); + kfree(bb); } + + mutex_unlock(&dev_priv->drm.struct_mutex); } static int prepare_workload(struct intel_vgpu_workload *workload) -- cgit From 9556e118889293f6d5d226b64688ee2adfd8964c Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 10 Oct 2017 13:51:32 +0800 Subject: drm/i915/gvt: Use I915_GTT_PAGE_SIZE As there is already an I915_GTT_PAGE_SIZE marco in i915, let GVT-g use it as well. Also this patch re-names some GTT marcos with additional prefix. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f2d4c90ea1d4..7a1ffaa9ae06 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -81,7 +81,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) while (i < context_page_num) { context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, (u32)((workload->ctx_desc.lrca + i) << - GTT_PAGE_SHIFT)); + I915_GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("Invalid guest context descriptor\n"); return -EFAULT; @@ -90,7 +90,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, - GTT_PAGE_SIZE); + I915_GTT_PAGE_SIZE); kunmap(page); i++; } @@ -120,7 +120,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), (void *)shadow_ring_context + sizeof(*shadow_ring_context), - GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); kunmap(page); return 0; @@ -635,7 +635,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) while (i < context_page_num) { context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, (u32)((workload->ctx_desc.lrca + i) << - GTT_PAGE_SHIFT)); + I915_GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("invalid guest context descriptor\n"); return; @@ -644,7 +644,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, - GTT_PAGE_SIZE); + I915_GTT_PAGE_SIZE); kunmap(page); i++; } @@ -669,7 +669,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), (void *)shadow_ring_context + sizeof(*shadow_ring_context), - GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); kunmap(page); } @@ -1198,7 +1198,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, int ret; ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, - (u32)((desc->lrca + 1) << GTT_PAGE_SHIFT)); + (u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT)); if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); return ERR_PTR(-EINVAL); -- cgit From 295764cd2ff41e2c1bc8af4050de77cec5e7a1c0 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 7 Nov 2017 05:23:02 +0800 Subject: drm/i915/gvt: Limit read hw reg to active vgpu mmio_read_from_hw() let vgpu could read hw reg, if vgpu's workload is running on hw, things is good. Otherwise vgpu will get other vgpu's reg val, it is unsafe. This patch limit such hw access to active vgpu. If vgpu isn't running on hw, the reg read of this vgpu will get the last active val which saved at schedule_out. v2: ring timestamp is walking continuously even if the ring is idle. so read hw directly. (Zhenyu) Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 7a1ffaa9ae06..9749113fccdd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -131,6 +131,20 @@ static inline bool is_gvt_request(struct drm_i915_gem_request *req) return i915_gem_context_force_single_submission(req->ctx); } +static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u32 ring_base = dev_priv->engine[ring_id]->mmio_base; + i915_reg_t reg; + + reg = RING_INSTDONE(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); + reg = RING_ACTHD(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); + reg = RING_ACTHD_UDW(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); +} + static int shadow_context_status_change(struct notifier_block *nb, unsigned long action, void *data) { @@ -175,6 +189,7 @@ static int shadow_context_status_change(struct notifier_block *nb, break; case INTEL_CONTEXT_SCHEDULE_OUT: case INTEL_CONTEXT_SCHEDULE_PREEMPTED: + save_ring_hw_state(workload->vgpu, ring_id); atomic_set(&workload->shadow_ctx_active, 0); break; default: -- cgit From f2880e04f3a5419366926182fc97a3c2e4fd8f2a Mon Sep 17 00:00:00 2001 From: fred gao Date: Tue, 14 Nov 2017 17:09:35 +0800 Subject: drm/i915/gvt: Move request alloc to dispatch_workload path only Previously the performance is improved through the workload auditing and shadowing ahead of vGPU scheduling, however, there is the case that more requests are allocated in submit_context before the previous request is added, the timeline will hold its seqno which is later. This patch is to move the request alloc to dispatch_workload function, where is the same place as request is added. It will fix the issue of kernel BUG for (timeline->seqno != request->fence.seqno) check when add_request. Fixes: 89ea20b930cb ("drm/i915/gvt: Factor out scan and shadow from workload dispatch") Signed-off-by: Chuanxiao Dong Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 9749113fccdd..a742b364c2c3 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -270,7 +270,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct drm_i915_gem_request *rq; struct intel_ring *ring; int ret; @@ -315,6 +314,27 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = populate_shadow_context(workload); if (ret) goto err_unpin; + workload->shadowed = true; + return 0; + +err_unpin: + engine->context_unpin(engine, shadow_ctx); +err_shadow: + release_shadow_wa_ctx(&workload->wa_ctx); +err_scan: + return ret; +} + +static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) +{ + int ring_id = workload->ring_id; + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; + struct drm_i915_gem_request *rq; + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + int ret; rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { @@ -329,14 +349,11 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = copy_workload_to_ring_buffer(workload); if (ret) goto err_unpin; - workload->shadowed = true; return 0; err_unpin: engine->context_unpin(engine, shadow_ctx); -err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_scan: return ret; } @@ -496,6 +513,12 @@ static int prepare_workload(struct intel_vgpu_workload *workload) goto err_unpin_mm; } + ret = intel_gvt_generate_request(workload); + if (ret) { + gvt_vgpu_err("fail to generate request\n"); + goto err_unpin_mm; + } + ret = prepare_shadow_batch_buffer(workload); if (ret) { gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); -- cgit